| Line Number |
../DebugInfoTest/example_mips_dbg.ll
BUT NOT
../DebugInfoTest/example_mips.ll
|
Line Number |
../DebugInfoTest/example_mips.ll
BUT NOT
../DebugInfoTest/example_mips_dbg.ll
|
| 1 |
//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// |
1 |
//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// |
| 2 |
// |
2 |
// |
| 3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 |
// See https://llvm.org/LICENSE.txt for license information. |
4 |
// See https://llvm.org/LICENSE.txt for license information. |
| 5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 |
// |
6 |
// |
| 7 |
//===----------------------------------------------------------------------===// |
7 |
//===----------------------------------------------------------------------===// |
| 8 |
// |
8 |
// |
| 9 |
// This file defines an instruction selector for the AArch64 target. |
9 |
// This file defines an instruction selector for the AArch64 target. |
| 10 |
// |
10 |
// |
| 11 |
//===----------------------------------------------------------------------===// |
11 |
//===----------------------------------------------------------------------===// |
| 12 |
|
12 |
|
| 13 |
#include "AArch64MachineFunctionInfo.h" |
13 |
#include "AArch64MachineFunctionInfo.h" |
| 14 |
#include "AArch64TargetMachine.h" |
14 |
#include "AArch64TargetMachine.h" |
| 15 |
#include "MCTargetDesc/AArch64AddressingModes.h" |
15 |
#include "MCTargetDesc/AArch64AddressingModes.h" |
| 16 |
#include "llvm/ADT/APSInt.h" |
16 |
#include "llvm/ADT/APSInt.h" |
| 17 |
#include "llvm/CodeGen/ISDOpcodes.h" |
17 |
#include "llvm/CodeGen/ISDOpcodes.h" |
| 18 |
#include "llvm/CodeGen/SelectionDAGISel.h" |
18 |
#include "llvm/CodeGen/SelectionDAGISel.h" |
| 19 |
#include "llvm/IR/Function.h" // To access function attributes. |
19 |
#include "llvm/IR/Function.h" // To access function attributes. |
| 20 |
#include "llvm/IR/GlobalValue.h" |
20 |
#include "llvm/IR/GlobalValue.h" |
| 21 |
#include "llvm/IR/Intrinsics.h" |
21 |
#include "llvm/IR/Intrinsics.h" |
| 22 |
#include "llvm/IR/IntrinsicsAArch64.h" |
22 |
#include "llvm/IR/IntrinsicsAArch64.h" |
| 23 |
#include "llvm/Support/Debug.h" |
23 |
#include "llvm/Support/Debug.h" |
| 24 |
#include "llvm/Support/ErrorHandling.h" |
24 |
#include "llvm/Support/ErrorHandling.h" |
| 25 |
#include "llvm/Support/KnownBits.h" |
25 |
#include "llvm/Support/KnownBits.h" |
| 26 |
#include "llvm/Support/MathExtras.h" |
26 |
#include "llvm/Support/MathExtras.h" |
| 27 |
#include "llvm/Support/raw_ostream.h" |
27 |
#include "llvm/Support/raw_ostream.h" |
| 28 |
|
28 |
|
| 29 |
using namespace llvm; |
29 |
using namespace llvm; |
| 30 |
|
30 |
|
| 31 |
#define DEBUG_TYPE "aarch64-isel" |
31 |
#define DEBUG_TYPE "aarch64-isel" |
| 32 |
#define PASS_NAME "AArch64 Instruction Selection" |
32 |
#define PASS_NAME "AArch64 Instruction Selection" |
| 33 |
|
33 |
|
| 34 |
//===--------------------------------------------------------------------===// |
34 |
//===--------------------------------------------------------------------===// |
| 35 |
/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine |
35 |
/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine |
| 36 |
/// instructions for SelectionDAG operations. |
36 |
/// instructions for SelectionDAG operations. |
| 37 |
/// |
37 |
/// |
| 38 |
namespace { |
38 |
namespace { |
| 39 |
|
39 |
|
| 40 |
class AArch64DAGToDAGISel : public SelectionDAGISel { |
40 |
class AArch64DAGToDAGISel : public SelectionDAGISel { |
| 41 |
|
41 |
|
| 42 |
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
42 |
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
| 43 |
/// make the right decision when generating code for different targets. |
43 |
/// make the right decision when generating code for different targets. |
| 44 |
const AArch64Subtarget *Subtarget; |
44 |
const AArch64Subtarget *Subtarget; |
| 45 |
|
45 |
|
| 46 |
public: |
46 |
public: |
| 47 |
static char ID; |
47 |
static char ID; |
| 48 |
|
48 |
|
| 49 |
AArch64DAGToDAGISel() = delete; |
49 |
AArch64DAGToDAGISel() = delete; |
| 50 |
|
50 |
|
| 51 |
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, |
51 |
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, |
| 52 |
CodeGenOpt::Level OptLevel) |
52 |
CodeGenOpt::Level OptLevel) |
| 53 |
: SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {} |
53 |
: SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {} |
| 54 |
|
54 |
|
| 55 |
bool runOnMachineFunction(MachineFunction &MF) override { |
55 |
bool runOnMachineFunction(MachineFunction &MF) override { |
| 56 |
Subtarget = &MF.getSubtarget(); |
56 |
Subtarget = &MF.getSubtarget(); |
| 57 |
return SelectionDAGISel::runOnMachineFunction(MF); |
57 |
return SelectionDAGISel::runOnMachineFunction(MF); |
| 58 |
} |
58 |
} |
| 59 |
|
59 |
|
| 60 |
void Select(SDNode *Node) override; |
60 |
void Select(SDNode *Node) override; |
| 61 |
|
61 |
|
| 62 |
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for |
62 |
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for |
| 63 |
/// inline asm expressions. |
63 |
/// inline asm expressions. |
| 64 |
bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
64 |
bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
| 65 |
unsigned ConstraintID, |
65 |
unsigned ConstraintID, |
| 66 |
std::vector &OutOps) override; |
66 |
std::vector &OutOps) override; |
| 67 |
|
67 |
|
| 68 |
template |
68 |
template |
| 69 |
bool SelectRDVLImm(SDValue N, SDValue &Imm); |
69 |
bool SelectRDVLImm(SDValue N, SDValue &Imm); |
| 70 |
|
70 |
|
| 71 |
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); |
71 |
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); |
| 72 |
bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); |
72 |
bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); |
| 73 |
bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); |
73 |
bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); |
| 74 |
bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); |
74 |
bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); |
| 75 |
bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { |
75 |
bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { |
| 76 |
return SelectShiftedRegister(N, false, Reg, Shift); |
76 |
return SelectShiftedRegister(N, false, Reg, Shift); |
| 77 |
} |
77 |
} |
| 78 |
bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { |
78 |
bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { |
| 79 |
return SelectShiftedRegister(N, true, Reg, Shift); |
79 |
return SelectShiftedRegister(N, true, Reg, Shift); |
| 80 |
} |
80 |
} |
| 81 |
bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { |
81 |
bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 82 |
return SelectAddrModeIndexed7S(N, 1, Base, OffImm); |
82 |
return SelectAddrModeIndexed7S(N, 1, Base, OffImm); |
| 83 |
} |
83 |
} |
| 84 |
bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { |
84 |
bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 85 |
return SelectAddrModeIndexed7S(N, 2, Base, OffImm); |
85 |
return SelectAddrModeIndexed7S(N, 2, Base, OffImm); |
| 86 |
} |
86 |
} |
| 87 |
bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { |
87 |
bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 88 |
return SelectAddrModeIndexed7S(N, 4, Base, OffImm); |
88 |
return SelectAddrModeIndexed7S(N, 4, Base, OffImm); |
| 89 |
} |
89 |
} |
| 90 |
bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { |
90 |
bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 91 |
return SelectAddrModeIndexed7S(N, 8, Base, OffImm); |
91 |
return SelectAddrModeIndexed7S(N, 8, Base, OffImm); |
| 92 |
} |
92 |
} |
| 93 |
bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
93 |
bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 94 |
return SelectAddrModeIndexed7S(N, 16, Base, OffImm); |
94 |
return SelectAddrModeIndexed7S(N, 16, Base, OffImm); |
| 95 |
} |
95 |
} |
| 96 |
bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
96 |
bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 97 |
return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); |
97 |
return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); |
| 98 |
} |
98 |
} |
| 99 |
bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
99 |
bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 100 |
return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); |
100 |
return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); |
| 101 |
} |
101 |
} |
| 102 |
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { |
102 |
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 103 |
return SelectAddrModeIndexed(N, 1, Base, OffImm); |
103 |
return SelectAddrModeIndexed(N, 1, Base, OffImm); |
| 104 |
} |
104 |
} |
| 105 |
bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { |
105 |
bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 106 |
return SelectAddrModeIndexed(N, 2, Base, OffImm); |
106 |
return SelectAddrModeIndexed(N, 2, Base, OffImm); |
| 107 |
} |
107 |
} |
| 108 |
bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { |
108 |
bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 109 |
return SelectAddrModeIndexed(N, 4, Base, OffImm); |
109 |
return SelectAddrModeIndexed(N, 4, Base, OffImm); |
| 110 |
} |
110 |
} |
| 111 |
bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { |
111 |
bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 112 |
return SelectAddrModeIndexed(N, 8, Base, OffImm); |
112 |
return SelectAddrModeIndexed(N, 8, Base, OffImm); |
| 113 |
} |
113 |
} |
| 114 |
bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { |
114 |
bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 115 |
return SelectAddrModeIndexed(N, 16, Base, OffImm); |
115 |
return SelectAddrModeIndexed(N, 16, Base, OffImm); |
| 116 |
} |
116 |
} |
| 117 |
bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { |
117 |
bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 118 |
return SelectAddrModeUnscaled(N, 1, Base, OffImm); |
118 |
return SelectAddrModeUnscaled(N, 1, Base, OffImm); |
| 119 |
} |
119 |
} |
| 120 |
bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { |
120 |
bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 121 |
return SelectAddrModeUnscaled(N, 2, Base, OffImm); |
121 |
return SelectAddrModeUnscaled(N, 2, Base, OffImm); |
| 122 |
} |
122 |
} |
| 123 |
bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { |
123 |
bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 124 |
return SelectAddrModeUnscaled(N, 4, Base, OffImm); |
124 |
return SelectAddrModeUnscaled(N, 4, Base, OffImm); |
| 125 |
} |
125 |
} |
| 126 |
bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { |
126 |
bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 127 |
return SelectAddrModeUnscaled(N, 8, Base, OffImm); |
127 |
return SelectAddrModeUnscaled(N, 8, Base, OffImm); |
| 128 |
} |
128 |
} |
| 129 |
bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { |
129 |
bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 130 |
return SelectAddrModeUnscaled(N, 16, Base, OffImm); |
130 |
return SelectAddrModeUnscaled(N, 16, Base, OffImm); |
| 131 |
} |
131 |
} |
| 132 |
template |
132 |
template |
| 133 |
bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { |
133 |
bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { |
| 134 |
// Test if there is an appropriate addressing mode and check if the |
134 |
// Test if there is an appropriate addressing mode and check if the |
| 135 |
// immediate fits. |
135 |
// immediate fits. |
| 136 |
bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); |
136 |
bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); |
| 137 |
if (Found) { |
137 |
if (Found) { |
| 138 |
if (auto *CI = dyn_cast(OffImm)) { |
138 |
if (auto *CI = dyn_cast(OffImm)) { |
| 139 |
int64_t C = CI->getSExtValue(); |
139 |
int64_t C = CI->getSExtValue(); |
| 140 |
if (C <= Max) |
140 |
if (C <= Max) |
| 141 |
return true; |
141 |
return true; |
| 142 |
} |
142 |
} |
| 143 |
} |
143 |
} |
| 144 |
|
144 |
|
| 145 |
// Otherwise, base only, materialize address in register. |
145 |
// Otherwise, base only, materialize address in register. |
| 146 |
Base = N; |
146 |
Base = N; |
| 147 |
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); |
147 |
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); |
| 148 |
return true; |
148 |
return true; |
| 149 |
} |
149 |
} |
| 150 |
|
150 |
|
| 151 |
template |
151 |
template |
| 152 |
bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, |
152 |
bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, |
| 153 |
SDValue &SignExtend, SDValue &DoShift) { |
153 |
SDValue &SignExtend, SDValue &DoShift) { |
| 154 |
return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); |
154 |
return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); |
| 155 |
} |
155 |
} |
| 156 |
|
156 |
|
| 157 |
template |
157 |
template |
| 158 |
bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, |
158 |
bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, |
| 159 |
SDValue &SignExtend, SDValue &DoShift) { |
159 |
SDValue &SignExtend, SDValue &DoShift) { |
| 160 |
return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); |
160 |
return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); |
| 161 |
} |
161 |
} |
| 162 |
|
162 |
|
| 163 |
bool SelectExtractHigh(SDValue N, SDValue &Res) { |
163 |
bool SelectExtractHigh(SDValue N, SDValue &Res) { |
| 164 |
if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) |
164 |
if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) |
| 165 |
N = N->getOperand(0); |
165 |
N = N->getOperand(0); |
| 166 |
if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || |
166 |
if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || |
| 167 |
!isa(N->getOperand(1))) |
167 |
!isa(N->getOperand(1))) |
| 168 |
return false; |
168 |
return false; |
| 169 |
EVT VT = N->getValueType(0); |
169 |
EVT VT = N->getValueType(0); |
| 170 |
EVT LVT = N->getOperand(0).getValueType(); |
170 |
EVT LVT = N->getOperand(0).getValueType(); |
| 171 |
unsigned Index = N->getConstantOperandVal(1); |
171 |
unsigned Index = N->getConstantOperandVal(1); |
| 172 |
if (!VT.is64BitVector() || !LVT.is128BitVector() || |
172 |
if (!VT.is64BitVector() || !LVT.is128BitVector() || |
| 173 |
Index != VT.getVectorNumElements()) |
173 |
Index != VT.getVectorNumElements()) |
| 174 |
return false; |
174 |
return false; |
| 175 |
Res = N->getOperand(0); |
175 |
Res = N->getOperand(0); |
| 176 |
return true; |
176 |
return true; |
| 177 |
} |
177 |
} |
| 178 |
|
178 |
|
| 179 |
bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { |
179 |
bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { |
| 180 |
if (N.getOpcode() != AArch64ISD::VLSHR) |
180 |
if (N.getOpcode() != AArch64ISD::VLSHR) |
| 181 |
return false; |
181 |
return false; |
| 182 |
SDValue Op = N->getOperand(0); |
182 |
SDValue Op = N->getOperand(0); |
| 183 |
EVT VT = Op.getValueType(); |
183 |
EVT VT = Op.getValueType(); |
| 184 |
unsigned ShtAmt = N->getConstantOperandVal(1); |
184 |
unsigned ShtAmt = N->getConstantOperandVal(1); |
| 185 |
if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) |
185 |
if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) |
| 186 |
return false; |
186 |
return false; |
| 187 |
|
187 |
|
| 188 |
APInt Imm; |
188 |
APInt Imm; |
| 189 |
if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) |
189 |
if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) |
| 190 |
Imm = APInt(VT.getScalarSizeInBits(), |
190 |
Imm = APInt(VT.getScalarSizeInBits(), |
| 191 |
Op.getOperand(1).getConstantOperandVal(0) |
191 |
Op.getOperand(1).getConstantOperandVal(0) |
| 192 |
<< Op.getOperand(1).getConstantOperandVal(1)); |
192 |
<< Op.getOperand(1).getConstantOperandVal(1)); |
| 193 |
else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && |
193 |
else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && |
| 194 |
isa(Op.getOperand(1).getOperand(0))) |
194 |
isa(Op.getOperand(1).getOperand(0))) |
| 195 |
Imm = APInt(VT.getScalarSizeInBits(), |
195 |
Imm = APInt(VT.getScalarSizeInBits(), |
| 196 |
Op.getOperand(1).getConstantOperandVal(0)); |
196 |
Op.getOperand(1).getConstantOperandVal(0)); |
| 197 |
else |
197 |
else |
| 198 |
return false; |
198 |
return false; |
| 199 |
|
199 |
|
| 200 |
if (Imm != 1ULL << (ShtAmt - 1)) |
200 |
if (Imm != 1ULL << (ShtAmt - 1)) |
| 201 |
return false; |
201 |
return false; |
| 202 |
|
202 |
|
| 203 |
Res1 = Op.getOperand(0); |
203 |
Res1 = Op.getOperand(0); |
| 204 |
Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); |
204 |
Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); |
| 205 |
return true; |
205 |
return true; |
| 206 |
} |
206 |
} |
| 207 |
|
207 |
|
| 208 |
bool SelectDupZeroOrUndef(SDValue N) { |
208 |
bool SelectDupZeroOrUndef(SDValue N) { |
| 209 |
switch(N->getOpcode()) { |
209 |
switch(N->getOpcode()) { |
| 210 |
case ISD::UNDEF: |
210 |
case ISD::UNDEF: |
| 211 |
return true; |
211 |
return true; |
| 212 |
case AArch64ISD::DUP: |
212 |
case AArch64ISD::DUP: |
| 213 |
case ISD::SPLAT_VECTOR: { |
213 |
case ISD::SPLAT_VECTOR: { |
| 214 |
auto Opnd0 = N->getOperand(0); |
214 |
auto Opnd0 = N->getOperand(0); |
| 215 |
if (isNullConstant(Opnd0)) |
215 |
if (isNullConstant(Opnd0)) |
| 216 |
return true; |
216 |
return true; |
| 217 |
if (isNullFPConstant(Opnd0)) |
217 |
if (isNullFPConstant(Opnd0)) |
| 218 |
return true; |
218 |
return true; |
| 219 |
break; |
219 |
break; |
| 220 |
} |
220 |
} |
| 221 |
default: |
221 |
default: |
| 222 |
break; |
222 |
break; |
| 223 |
} |
223 |
} |
| 224 |
|
224 |
|
| 225 |
return false; |
225 |
return false; |
| 226 |
} |
226 |
} |
| 227 |
|
227 |
|
| 228 |
bool SelectDupZero(SDValue N) { |
228 |
bool SelectDupZero(SDValue N) { |
| 229 |
switch(N->getOpcode()) { |
229 |
switch(N->getOpcode()) { |
| 230 |
case AArch64ISD::DUP: |
230 |
case AArch64ISD::DUP: |
| 231 |
case ISD::SPLAT_VECTOR: { |
231 |
case ISD::SPLAT_VECTOR: { |
| 232 |
auto Opnd0 = N->getOperand(0); |
232 |
auto Opnd0 = N->getOperand(0); |
| 233 |
if (isNullConstant(Opnd0)) |
233 |
if (isNullConstant(Opnd0)) |
| 234 |
return true; |
234 |
return true; |
| 235 |
if (isNullFPConstant(Opnd0)) |
235 |
if (isNullFPConstant(Opnd0)) |
| 236 |
return true; |
236 |
return true; |
| 237 |
break; |
237 |
break; |
| 238 |
} |
238 |
} |
| 239 |
} |
239 |
} |
| 240 |
|
240 |
|
| 241 |
return false; |
241 |
return false; |
| 242 |
} |
242 |
} |
| 243 |
|
243 |
|
| 244 |
bool SelectDupNegativeZero(SDValue N) { |
244 |
bool SelectDupNegativeZero(SDValue N) { |
| 245 |
switch(N->getOpcode()) { |
245 |
switch(N->getOpcode()) { |
| 246 |
case AArch64ISD::DUP: |
246 |
case AArch64ISD::DUP: |
| 247 |
case ISD::SPLAT_VECTOR: { |
247 |
case ISD::SPLAT_VECTOR: { |
| 248 |
ConstantFPSDNode *Const = dyn_cast(N->getOperand(0)); |
248 |
ConstantFPSDNode *Const = dyn_cast(N->getOperand(0)); |
| 249 |
return Const && Const->isZero() && Const->isNegative(); |
249 |
return Const && Const->isZero() && Const->isNegative(); |
| 250 |
} |
250 |
} |
| 251 |
} |
251 |
} |
| 252 |
|
252 |
|
| 253 |
return false; |
253 |
return false; |
| 254 |
} |
254 |
} |
| 255 |
|
255 |
|
| 256 |
template |
256 |
template |
| 257 |
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { |
257 |
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { |
| 258 |
return SelectSVEAddSubImm(N, VT, Imm, Shift); |
258 |
return SelectSVEAddSubImm(N, VT, Imm, Shift); |
| 259 |
} |
259 |
} |
| 260 |
|
260 |
|
| 261 |
template |
261 |
template |
| 262 |
bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { |
262 |
bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { |
| 263 |
return SelectSVECpyDupImm(N, VT, Imm, Shift); |
263 |
return SelectSVECpyDupImm(N, VT, Imm, Shift); |
| 264 |
} |
264 |
} |
| 265 |
|
265 |
|
| 266 |
template |
266 |
template |
| 267 |
bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { |
267 |
bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { |
| 268 |
return SelectSVELogicalImm(N, VT, Imm, Invert); |
268 |
return SelectSVELogicalImm(N, VT, Imm, Invert); |
| 269 |
} |
269 |
} |
| 270 |
|
270 |
|
| 271 |
template |
271 |
template |
| 272 |
bool SelectSVEArithImm(SDValue N, SDValue &Imm) { |
272 |
bool SelectSVEArithImm(SDValue N, SDValue &Imm) { |
| 273 |
return SelectSVEArithImm(N, VT, Imm); |
273 |
return SelectSVEArithImm(N, VT, Imm); |
| 274 |
} |
274 |
} |
| 275 |
|
275 |
|
| 276 |
template |
276 |
template |
| 277 |
bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { |
277 |
bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { |
| 278 |
return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); |
278 |
return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); |
| 279 |
} |
279 |
} |
| 280 |
|
280 |
|
| 281 |
bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { |
281 |
bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { |
| 282 |
if (N->getOpcode() != ISD::SPLAT_VECTOR) |
282 |
if (N->getOpcode() != ISD::SPLAT_VECTOR) |
| 283 |
return false; |
283 |
return false; |
| 284 |
|
284 |
|
| 285 |
EVT EltVT = N->getValueType(0).getVectorElementType(); |
285 |
EVT EltVT = N->getValueType(0).getVectorElementType(); |
| 286 |
return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, |
286 |
return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, |
| 287 |
/* High */ EltVT.getFixedSizeInBits(), |
287 |
/* High */ EltVT.getFixedSizeInBits(), |
| 288 |
/* AllowSaturation */ true, Imm); |
288 |
/* AllowSaturation */ true, Imm); |
| 289 |
} |
289 |
} |
| 290 |
|
290 |
|
| 291 |
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. |
291 |
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. |
| 292 |
template |
292 |
template |
| 293 |
bool SelectCntImm(SDValue N, SDValue &Imm) { |
293 |
bool SelectCntImm(SDValue N, SDValue &Imm) { |
| 294 |
if (!isa(N)) |
294 |
if (!isa(N)) |
| 295 |
return false; |
295 |
return false; |
| 296 |
|
296 |
|
| 297 |
int64_t MulImm = cast(N)->getSExtValue(); |
297 |
int64_t MulImm = cast(N)->getSExtValue(); |
| 298 |
if (Shift) |
298 |
if (Shift) |
| 299 |
MulImm = 1LL << MulImm; |
299 |
MulImm = 1LL << MulImm; |
| 300 |
|
300 |
|
| 301 |
if ((MulImm % std::abs(Scale)) != 0) |
301 |
if ((MulImm % std::abs(Scale)) != 0) |
| 302 |
return false; |
302 |
return false; |
| 303 |
|
303 |
|
| 304 |
MulImm /= Scale; |
304 |
MulImm /= Scale; |
| 305 |
if ((MulImm >= Min) && (MulImm <= Max)) { |
305 |
if ((MulImm >= Min) && (MulImm <= Max)) { |
| 306 |
Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); |
306 |
Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); |
| 307 |
return true; |
307 |
return true; |
| 308 |
} |
308 |
} |
| 309 |
|
309 |
|
| 310 |
return false; |
310 |
return false; |
| 311 |
} |
311 |
} |
| 312 |
|
312 |
|
| 313 |
template |
313 |
template |
| 314 |
bool SelectEXTImm(SDValue N, SDValue &Imm) { |
314 |
bool SelectEXTImm(SDValue N, SDValue &Imm) { |
| 315 |
if (!isa(N)) |
315 |
if (!isa(N)) |
| 316 |
return false; |
316 |
return false; |
| 317 |
|
317 |
|
| 318 |
int64_t MulImm = cast(N)->getSExtValue(); |
318 |
int64_t MulImm = cast(N)->getSExtValue(); |
| 319 |
|
319 |
|
| 320 |
if (MulImm >= 0 && MulImm <= Max) { |
320 |
if (MulImm >= 0 && MulImm <= Max) { |
| 321 |
MulImm *= Scale; |
321 |
MulImm *= Scale; |
| 322 |
Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); |
322 |
Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); |
| 323 |
return true; |
323 |
return true; |
| 324 |
} |
324 |
} |
| 325 |
|
325 |
|
| 326 |
return false; |
326 |
return false; |
| 327 |
} |
327 |
} |
| 328 |
|
328 |
|
| 329 |
template bool ImmToTile(SDValue N, SDValue &Imm) { |
329 |
template bool ImmToTile(SDValue N, SDValue &Imm) { |
| 330 |
if (auto *CI = dyn_cast(N)) { |
330 |
if (auto *CI = dyn_cast(N)) { |
| 331 |
uint64_t C = CI->getZExtValue(); |
331 |
uint64_t C = CI->getZExtValue(); |
| 332 |
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); |
332 |
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); |
| 333 |
return true; |
333 |
return true; |
| 334 |
} |
334 |
} |
| 335 |
return false; |
335 |
return false; |
| 336 |
} |
336 |
} |
| 337 |
|
337 |
|
| 338 |
/// Form sequences of consecutive 64/128-bit registers for use in NEON |
338 |
/// Form sequences of consecutive 64/128-bit registers for use in NEON |
| 339 |
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have |
339 |
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have |
| 340 |
/// between 1 and 4 elements. If it contains a single element that is returned |
340 |
/// between 1 and 4 elements. If it contains a single element that is returned |
| 341 |
/// unchanged; otherwise a REG_SEQUENCE value is returned. |
341 |
/// unchanged; otherwise a REG_SEQUENCE value is returned. |
| 342 |
SDValue createDTuple(ArrayRef Vecs); |
342 |
SDValue createDTuple(ArrayRef Vecs); |
| 343 |
SDValue createQTuple(ArrayRef Vecs); |
343 |
SDValue createQTuple(ArrayRef Vecs); |
| 344 |
// Form a sequence of SVE registers for instructions using list of vectors, |
344 |
// Form a sequence of SVE registers for instructions using list of vectors, |
| 345 |
// e.g. structured loads and stores (ldN, stN). |
345 |
// e.g. structured loads and stores (ldN, stN). |
| 346 |
SDValue createZTuple(ArrayRef Vecs); |
346 |
SDValue createZTuple(ArrayRef Vecs); |
| 347 |
|
347 |
|
| 348 |
// Similar to above, except the register must start at a multiple of the |
348 |
// Similar to above, except the register must start at a multiple of the |
| 349 |
// tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple. |
349 |
// tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple. |
| 350 |
SDValue createZMulTuple(ArrayRef Regs); |
350 |
SDValue createZMulTuple(ArrayRef Regs); |
| 351 |
|
351 |
|
| 352 |
/// Generic helper for the createDTuple/createQTuple |
352 |
/// Generic helper for the createDTuple/createQTuple |
| 353 |
/// functions. Those should almost always be called instead. |
353 |
/// functions. Those should almost always be called instead. |
| 354 |
SDValue createTuple(ArrayRef Vecs, const unsigned RegClassIDs[], |
354 |
SDValue createTuple(ArrayRef Vecs, const unsigned RegClassIDs[], |
| 355 |
const unsigned SubRegs[]); |
355 |
const unsigned SubRegs[]); |
| 356 |
|
356 |
|
| 357 |
void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); |
357 |
void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); |
| 358 |
|
358 |
|
| 359 |
bool tryIndexedLoad(SDNode *N); |
359 |
bool tryIndexedLoad(SDNode *N); |
| 360 |
|
360 |
|
| 361 |
bool trySelectStackSlotTagP(SDNode *N); |
361 |
bool trySelectStackSlotTagP(SDNode *N); |
| 362 |
void SelectTagP(SDNode *N); |
362 |
void SelectTagP(SDNode *N); |
| 363 |
|
363 |
|
| 364 |
void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
364 |
void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
| 365 |
unsigned SubRegIdx); |
365 |
unsigned SubRegIdx); |
| 366 |
void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
366 |
void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
| 367 |
unsigned SubRegIdx); |
367 |
unsigned SubRegIdx); |
| 368 |
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
368 |
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| 369 |
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
369 |
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| 370 |
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, |
370 |
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, |
| 371 |
unsigned Opc_rr, unsigned Opc_ri, |
371 |
unsigned Opc_rr, unsigned Opc_ri, |
| 372 |
bool IsIntr = false); |
372 |
bool IsIntr = false); |
| 373 |
void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, |
373 |
void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, |
| 374 |
unsigned Scale, unsigned Opc_ri, |
374 |
unsigned Scale, unsigned Opc_ri, |
| 375 |
unsigned Opc_rr); |
375 |
unsigned Opc_rr); |
| 376 |
void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, |
376 |
void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, |
| 377 |
bool IsZmMulti, unsigned Opcode, |
377 |
bool IsZmMulti, unsigned Opcode, |
| 378 |
bool HasPred = false); |
378 |
bool HasPred = false); |
| 379 |
void SelectPExtPair(SDNode *N, unsigned Opc); |
379 |
void SelectPExtPair(SDNode *N, unsigned Opc); |
| 380 |
void SelectWhilePair(SDNode *N, unsigned Opc); |
380 |
void SelectWhilePair(SDNode *N, unsigned Opc); |
| 381 |
void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); |
381 |
void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); |
| 382 |
void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); |
382 |
void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); |
| 383 |
void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs, |
383 |
void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs, |
| 384 |
bool IsTupleInput, unsigned Opc); |
384 |
bool IsTupleInput, unsigned Opc); |
| 385 |
void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode); |
385 |
void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode); |
| 386 |
|
386 |
|
| 387 |
template |
387 |
template |
| 388 |
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, |
388 |
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, |
| 389 |
unsigned Op); |
389 |
unsigned Op); |
| 390 |
|
390 |
|
| 391 |
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); |
391 |
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); |
| 392 |
/// SVE Reg+Imm addressing mode. |
392 |
/// SVE Reg+Imm addressing mode. |
| 393 |
template |
393 |
template |
| 394 |
bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, |
394 |
bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, |
| 395 |
SDValue &OffImm); |
395 |
SDValue &OffImm); |
| 396 |
/// SVE Reg+Reg address mode. |
396 |
/// SVE Reg+Reg address mode. |
| 397 |
template |
397 |
template |
| 398 |
bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { |
398 |
bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { |
| 399 |
return SelectSVERegRegAddrMode(N, Scale, Base, Offset); |
399 |
return SelectSVERegRegAddrMode(N, Scale, Base, Offset); |
| 400 |
} |
400 |
} |
| 401 |
|
401 |
|
| 402 |
template |
402 |
template |
| 403 |
bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { |
403 |
bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { |
| 404 |
return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); |
404 |
return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); |
| 405 |
} |
405 |
} |
| 406 |
|
406 |
|
| 407 |
void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); |
407 |
void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); |
| 408 |
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); |
408 |
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); |
| 409 |
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
409 |
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| 410 |
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
410 |
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); |
| 411 |
void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, |
411 |
void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, |
| 412 |
unsigned Opc_rr, unsigned Opc_ri); |
412 |
unsigned Opc_rr, unsigned Opc_ri); |
| 413 |
std::tuple |
413 |
std::tuple |
| 414 |
findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, |
414 |
findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, |
| 415 |
const SDValue &OldBase, const SDValue &OldOffset, |
415 |
const SDValue &OldBase, const SDValue &OldOffset, |
| 416 |
unsigned Scale); |
416 |
unsigned Scale); |
| 417 |
|
417 |
|
| 418 |
bool tryBitfieldExtractOp(SDNode *N); |
418 |
bool tryBitfieldExtractOp(SDNode *N); |
| 419 |
bool tryBitfieldExtractOpFromSExt(SDNode *N); |
419 |
bool tryBitfieldExtractOpFromSExt(SDNode *N); |
| 420 |
bool tryBitfieldInsertOp(SDNode *N); |
420 |
bool tryBitfieldInsertOp(SDNode *N); |
| 421 |
bool tryBitfieldInsertInZeroOp(SDNode *N); |
421 |
bool tryBitfieldInsertInZeroOp(SDNode *N); |
| 422 |
bool tryShiftAmountMod(SDNode *N); |
422 |
bool tryShiftAmountMod(SDNode *N); |
| 423 |
|
423 |
|
| 424 |
bool tryReadRegister(SDNode *N); |
424 |
bool tryReadRegister(SDNode *N); |
| 425 |
bool tryWriteRegister(SDNode *N); |
425 |
bool tryWriteRegister(SDNode *N); |
| 426 |
|
426 |
|
| 427 |
bool trySelectCastFixedLengthToScalableVector(SDNode *N); |
427 |
bool trySelectCastFixedLengthToScalableVector(SDNode *N); |
| 428 |
bool trySelectCastScalableToFixedLengthVector(SDNode *N); |
428 |
bool trySelectCastScalableToFixedLengthVector(SDNode *N); |
| 429 |
|
429 |
|
| 430 |
// Include the pieces autogenerated from the target description. |
430 |
// Include the pieces autogenerated from the target description. |
| 431 |
#include "AArch64GenDAGISel.inc" |
431 |
#include "AArch64GenDAGISel.inc" |
| 432 |
|
432 |
|
| 433 |
private: |
433 |
private: |
| 434 |
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, |
434 |
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, |
| 435 |
SDValue &Shift); |
435 |
SDValue &Shift); |
| 436 |
bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); |
436 |
bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift); |
| 437 |
bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, |
437 |
bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, |
| 438 |
SDValue &OffImm) { |
438 |
SDValue &OffImm) { |
| 439 |
return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); |
439 |
return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); |
| 440 |
} |
440 |
} |
| 441 |
bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, |
441 |
bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, |
| 442 |
unsigned Size, SDValue &Base, |
442 |
unsigned Size, SDValue &Base, |
| 443 |
SDValue &OffImm); |
443 |
SDValue &OffImm); |
| 444 |
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, |
444 |
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, |
| 445 |
SDValue &OffImm); |
445 |
SDValue &OffImm); |
| 446 |
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, |
446 |
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, |
| 447 |
SDValue &OffImm); |
447 |
SDValue &OffImm); |
| 448 |
bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, |
448 |
bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, |
| 449 |
SDValue &Offset, SDValue &SignExtend, |
449 |
SDValue &Offset, SDValue &SignExtend, |
| 450 |
SDValue &DoShift); |
450 |
SDValue &DoShift); |
| 451 |
bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, |
451 |
bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, |
| 452 |
SDValue &Offset, SDValue &SignExtend, |
452 |
SDValue &Offset, SDValue &SignExtend, |
| 453 |
SDValue &DoShift); |
453 |
SDValue &DoShift); |
| 454 |
bool isWorthFolding(SDValue V) const; |
454 |
bool isWorthFolding(SDValue V) const; |
| 455 |
bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, |
455 |
bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, |
| 456 |
SDValue &Offset, SDValue &SignExtend); |
456 |
SDValue &Offset, SDValue &SignExtend); |
| 457 |
|
457 |
|
| 458 |
template |
458 |
template |
| 459 |
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { |
459 |
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { |
| 460 |
return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); |
460 |
return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); |
| 461 |
} |
461 |
} |
| 462 |
|
462 |
|
| 463 |
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); |
463 |
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); |
| 464 |
|
464 |
|
| 465 |
bool SelectCMP_SWAP(SDNode *N); |
465 |
bool SelectCMP_SWAP(SDNode *N); |
| 466 |
|
466 |
|
| 467 |
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); |
467 |
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); |
| 468 |
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); |
468 |
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); |
| 469 |
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); |
469 |
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); |
| 470 |
|
470 |
|
| 471 |
bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); |
471 |
bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); |
| 472 |
bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, |
472 |
bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, |
| 473 |
bool AllowSaturation, SDValue &Imm); |
473 |
bool AllowSaturation, SDValue &Imm); |
| 474 |
|
474 |
|
| 475 |
bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); |
475 |
bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); |
| 476 |
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, |
476 |
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, |
| 477 |
SDValue &Offset); |
477 |
SDValue &Offset); |
| 478 |
bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, |
478 |
bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, |
| 479 |
SDValue &Offset, unsigned Scale = 1); |
479 |
SDValue &Offset, unsigned Scale = 1); |
| 480 |
|
480 |
|
| 481 |
bool SelectAllActivePredicate(SDValue N); |
481 |
bool SelectAllActivePredicate(SDValue N); |
| 482 |
bool SelectAnyPredicate(SDValue N); |
482 |
bool SelectAnyPredicate(SDValue N); |
| 483 |
}; |
483 |
}; |
| 484 |
} // end anonymous namespace |
484 |
} // end anonymous namespace |
| 485 |
|
485 |
|
| 486 |
char AArch64DAGToDAGISel::ID = 0; |
486 |
char AArch64DAGToDAGISel::ID = 0; |
| 487 |
|
487 |
|
| 488 |
INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) |
488 |
INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) |
| 489 |
|
489 |
|
| 490 |
/// isIntImmediate - This method tests to see if the node is a constant |
490 |
/// isIntImmediate - This method tests to see if the node is a constant |
| 491 |
/// operand. If so Imm will receive the 32-bit value. |
491 |
/// operand. If so Imm will receive the 32-bit value. |
| 492 |
static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { |
492 |
static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { |
| 493 |
if (const ConstantSDNode *C = dyn_cast(N)) { |
493 |
if (const ConstantSDNode *C = dyn_cast(N)) { |
| 494 |
Imm = C->getZExtValue(); |
494 |
Imm = C->getZExtValue(); |
| 495 |
return true; |
495 |
return true; |
| 496 |
} |
496 |
} |
| 497 |
return false; |
497 |
return false; |
| 498 |
} |
498 |
} |
| 499 |
|
499 |
|
| 500 |
// isIntImmediate - This method tests to see if a constant operand. |
500 |
// isIntImmediate - This method tests to see if a constant operand. |
| 501 |
// If so Imm will receive the value. |
501 |
// If so Imm will receive the value. |
| 502 |
static bool isIntImmediate(SDValue N, uint64_t &Imm) { |
502 |
static bool isIntImmediate(SDValue N, uint64_t &Imm) { |
| 503 |
return isIntImmediate(N.getNode(), Imm); |
503 |
return isIntImmediate(N.getNode(), Imm); |
| 504 |
} |
504 |
} |
| 505 |
|
505 |
|
| 506 |
// isOpcWithIntImmediate - This method tests to see if the node is a specific |
506 |
// isOpcWithIntImmediate - This method tests to see if the node is a specific |
| 507 |
// opcode and that it has a immediate integer right operand. |
507 |
// opcode and that it has a immediate integer right operand. |
| 508 |
// If so Imm will receive the 32 bit value. |
508 |
// If so Imm will receive the 32 bit value. |
| 509 |
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, |
509 |
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, |
| 510 |
uint64_t &Imm) { |
510 |
uint64_t &Imm) { |
| 511 |
return N->getOpcode() == Opc && |
511 |
return N->getOpcode() == Opc && |
| 512 |
isIntImmediate(N->getOperand(1).getNode(), Imm); |
512 |
isIntImmediate(N->getOperand(1).getNode(), Imm); |
| 513 |
} |
513 |
} |
| 514 |
|
514 |
|
| 515 |
// isIntImmediateEq - This method tests to see if N is a constant operand that |
515 |
// isIntImmediateEq - This method tests to see if N is a constant operand that |
| 516 |
// is equivalent to 'ImmExpected'. |
516 |
// is equivalent to 'ImmExpected'. |
| 517 |
#ifndef NDEBUG |
517 |
#ifndef NDEBUG |
| 518 |
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { |
518 |
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { |
| 519 |
uint64_t Imm; |
519 |
uint64_t Imm; |
| 520 |
if (!isIntImmediate(N.getNode(), Imm)) |
520 |
if (!isIntImmediate(N.getNode(), Imm)) |
| 521 |
return false; |
521 |
return false; |
| 522 |
return Imm == ImmExpected; |
522 |
return Imm == ImmExpected; |
| 523 |
} |
523 |
} |
| 524 |
#endif |
524 |
#endif |
| 525 |
|
525 |
|
| 526 |
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( |
526 |
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( |
| 527 |
const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { |
527 |
const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { |
| 528 |
switch(ConstraintID) { |
528 |
switch(ConstraintID) { |
| 529 |
default: |
529 |
default: |
| 530 |
llvm_unreachable("Unexpected asm memory constraint"); |
530 |
llvm_unreachable("Unexpected asm memory constraint"); |
| 531 |
case InlineAsm::Constraint_m: |
531 |
case InlineAsm::Constraint_m: |
| 532 |
case InlineAsm::Constraint_o: |
532 |
case InlineAsm::Constraint_o: |
| 533 |
case InlineAsm::Constraint_Q: |
533 |
case InlineAsm::Constraint_Q: |
| 534 |
// We need to make sure that this one operand does not end up in XZR, thus |
534 |
// We need to make sure that this one operand does not end up in XZR, thus |
| 535 |
// require the address to be in a PointerRegClass register. |
535 |
// require the address to be in a PointerRegClass register. |
| 536 |
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
536 |
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
| 537 |
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); |
537 |
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); |
| 538 |
SDLoc dl(Op); |
538 |
SDLoc dl(Op); |
| 539 |
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); |
539 |
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); |
| 540 |
SDValue NewOp = |
540 |
SDValue NewOp = |
| 541 |
SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
541 |
SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
| 542 |
dl, Op.getValueType(), |
542 |
dl, Op.getValueType(), |
| 543 |
Op, RC), 0); |
543 |
Op, RC), 0); |
| 544 |
OutOps.push_back(NewOp); |
544 |
OutOps.push_back(NewOp); |
| 545 |
return false; |
545 |
return false; |
| 546 |
} |
546 |
} |
| 547 |
return true; |
547 |
return true; |
| 548 |
} |
548 |
} |
| 549 |
|
549 |
|
| 550 |
/// SelectArithImmed - Select an immediate value that can be represented as |
550 |
/// SelectArithImmed - Select an immediate value that can be represented as |
| 551 |
/// a 12-bit value shifted left by either 0 or 12. If so, return true with |
551 |
/// a 12-bit value shifted left by either 0 or 12. If so, return true with |
| 552 |
/// Val set to the 12-bit value and Shift set to the shifter operand. |
552 |
/// Val set to the 12-bit value and Shift set to the shifter operand. |
| 553 |
bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, |
553 |
bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, |
| 554 |
SDValue &Shift) { |
554 |
SDValue &Shift) { |
| 555 |
// This function is called from the addsub_shifted_imm ComplexPattern, |
555 |
// This function is called from the addsub_shifted_imm ComplexPattern, |
| 556 |
// which lists [imm] as the list of opcode it's interested in, however |
556 |
// which lists [imm] as the list of opcode it's interested in, however |
| 557 |
// we still need to check whether the operand is actually an immediate |
557 |
// we still need to check whether the operand is actually an immediate |
| 558 |
// here because the ComplexPattern opcode list is only used in |
558 |
// here because the ComplexPattern opcode list is only used in |
| 559 |
// root-level opcode matching. |
559 |
// root-level opcode matching. |
| 560 |
if (!isa(N.getNode())) |
560 |
if (!isa(N.getNode())) |
| 561 |
return false; |
561 |
return false; |
| 562 |
|
562 |
|
| 563 |
uint64_t Immed = cast(N.getNode())->getZExtValue(); |
563 |
uint64_t Immed = cast(N.getNode())->getZExtValue(); |
| 564 |
unsigned ShiftAmt; |
564 |
unsigned ShiftAmt; |
| 565 |
|
565 |
|
| 566 |
if (Immed >> 12 == 0) { |
566 |
if (Immed >> 12 == 0) { |
| 567 |
ShiftAmt = 0; |
567 |
ShiftAmt = 0; |
| 568 |
} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { |
568 |
} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { |
| 569 |
ShiftAmt = 12; |
569 |
ShiftAmt = 12; |
| 570 |
Immed = Immed >> 12; |
570 |
Immed = Immed >> 12; |
| 571 |
} else |
571 |
} else |
| 572 |
return false; |
572 |
return false; |
| 573 |
|
573 |
|
| 574 |
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); |
574 |
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); |
| 575 |
SDLoc dl(N); |
575 |
SDLoc dl(N); |
| 576 |
Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); |
576 |
Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); |
| 577 |
Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); |
577 |
Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); |
| 578 |
return true; |
578 |
return true; |
| 579 |
} |
579 |
} |
| 580 |
|
580 |
|
| 581 |
/// SelectNegArithImmed - As above, but negates the value before trying to |
581 |
/// SelectNegArithImmed - As above, but negates the value before trying to |
| 582 |
/// select it. |
582 |
/// select it. |
| 583 |
bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, |
583 |
bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, |
| 584 |
SDValue &Shift) { |
584 |
SDValue &Shift) { |
| 585 |
// This function is called from the addsub_shifted_imm ComplexPattern, |
585 |
// This function is called from the addsub_shifted_imm ComplexPattern, |
| 586 |
// which lists [imm] as the list of opcode it's interested in, however |
586 |
// which lists [imm] as the list of opcode it's interested in, however |
| 587 |
// we still need to check whether the operand is actually an immediate |
587 |
// we still need to check whether the operand is actually an immediate |
| 588 |
// here because the ComplexPattern opcode list is only used in |
588 |
// here because the ComplexPattern opcode list is only used in |
| 589 |
// root-level opcode matching. |
589 |
// root-level opcode matching. |
| 590 |
if (!isa(N.getNode())) |
590 |
if (!isa(N.getNode())) |
| 591 |
return false; |
591 |
return false; |
| 592 |
|
592 |
|
| 593 |
// The immediate operand must be a 24-bit zero-extended immediate. |
593 |
// The immediate operand must be a 24-bit zero-extended immediate. |
| 594 |
uint64_t Immed = cast(N.getNode())->getZExtValue(); |
594 |
uint64_t Immed = cast(N.getNode())->getZExtValue(); |
| 595 |
|
595 |
|
| 596 |
// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" |
596 |
// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" |
| 597 |
// have the opposite effect on the C flag, so this pattern mustn't match under |
597 |
// have the opposite effect on the C flag, so this pattern mustn't match under |
| 598 |
// those circumstances. |
598 |
// those circumstances. |
| 599 |
if (Immed == 0) |
599 |
if (Immed == 0) |
| 600 |
return false; |
600 |
return false; |
| 601 |
|
601 |
|
| 602 |
if (N.getValueType() == MVT::i32) |
602 |
if (N.getValueType() == MVT::i32) |
| 603 |
Immed = ~((uint32_t)Immed) + 1; |
603 |
Immed = ~((uint32_t)Immed) + 1; |
| 604 |
else |
604 |
else |
| 605 |
Immed = ~Immed + 1ULL; |
605 |
Immed = ~Immed + 1ULL; |
| 606 |
if (Immed & 0xFFFFFFFFFF000000ULL) |
606 |
if (Immed & 0xFFFFFFFFFF000000ULL) |
| 607 |
return false; |
607 |
return false; |
| 608 |
|
608 |
|
| 609 |
Immed &= 0xFFFFFFULL; |
609 |
Immed &= 0xFFFFFFULL; |
| 610 |
return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, |
610 |
return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, |
| 611 |
Shift); |
611 |
Shift); |
| 612 |
} |
612 |
} |
| 613 |
|
613 |
|
| 614 |
/// getShiftTypeForNode - Translate a shift node to the corresponding |
614 |
/// getShiftTypeForNode - Translate a shift node to the corresponding |
| 615 |
/// ShiftType value. |
615 |
/// ShiftType value. |
| 616 |
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { |
616 |
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { |
| 617 |
switch (N.getOpcode()) { |
617 |
switch (N.getOpcode()) { |
| 618 |
default: |
618 |
default: |
| 619 |
return AArch64_AM::InvalidShiftExtend; |
619 |
return AArch64_AM::InvalidShiftExtend; |
| 620 |
case ISD::SHL: |
620 |
case ISD::SHL: |
| 621 |
return AArch64_AM::LSL; |
621 |
return AArch64_AM::LSL; |
| 622 |
case ISD::SRL: |
622 |
case ISD::SRL: |
| 623 |
return AArch64_AM::LSR; |
623 |
return AArch64_AM::LSR; |
| 624 |
case ISD::SRA: |
624 |
case ISD::SRA: |
| 625 |
return AArch64_AM::ASR; |
625 |
return AArch64_AM::ASR; |
| 626 |
case ISD::ROTR: |
626 |
case ISD::ROTR: |
| 627 |
return AArch64_AM::ROR; |
627 |
return AArch64_AM::ROR; |
| 628 |
} |
628 |
} |
| 629 |
} |
629 |
} |
| 630 |
|
630 |
|
| 631 |
/// Determine whether it is worth it to fold SHL into the addressing |
631 |
/// Determine whether it is worth it to fold SHL into the addressing |
| 632 |
/// mode. |
632 |
/// mode. |
| 633 |
static bool isWorthFoldingSHL(SDValue V) { |
633 |
static bool isWorthFoldingSHL(SDValue V) { |
| 634 |
assert(V.getOpcode() == ISD::SHL && "invalid opcode"); |
634 |
assert(V.getOpcode() == ISD::SHL && "invalid opcode"); |
| 635 |
// It is worth folding logical shift of up to three places. |
635 |
// It is worth folding logical shift of up to three places. |
| 636 |
auto *CSD = dyn_cast(V.getOperand(1)); |
636 |
auto *CSD = dyn_cast(V.getOperand(1)); |
| 637 |
if (!CSD) |
637 |
if (!CSD) |
| 638 |
return false; |
638 |
return false; |
| 639 |
unsigned ShiftVal = CSD->getZExtValue(); |
639 |
unsigned ShiftVal = CSD->getZExtValue(); |
| 640 |
if (ShiftVal > 3) |
640 |
if (ShiftVal > 3) |
| 641 |
return false; |
641 |
return false; |
| 642 |
|
642 |
|
| 643 |
// Check if this particular node is reused in any non-memory related |
643 |
// Check if this particular node is reused in any non-memory related |
| 644 |
// operation. If yes, do not try to fold this node into the address |
644 |
// operation. If yes, do not try to fold this node into the address |
| 645 |
// computation, since the computation will be kept. |
645 |
// computation, since the computation will be kept. |
| 646 |
const SDNode *Node = V.getNode(); |
646 |
const SDNode *Node = V.getNode(); |
| 647 |
for (SDNode *UI : Node->uses()) |
647 |
for (SDNode *UI : Node->uses()) |
| 648 |
if (!isa(*UI)) |
648 |
if (!isa(*UI)) |
| 649 |
for (SDNode *UII : UI->uses()) |
649 |
for (SDNode *UII : UI->uses()) |
| 650 |
if (!isa(*UII)) |
650 |
if (!isa(*UII)) |
| 651 |
return false; |
651 |
return false; |
| 652 |
return true; |
652 |
return true; |
| 653 |
} |
653 |
} |
| 654 |
|
654 |
|
| 655 |
/// Determine whether it is worth to fold V into an extended register. |
655 |
/// Determine whether it is worth to fold V into an extended register. |
| 656 |
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { |
656 |
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { |
| 657 |
// Trivial if we are optimizing for code size or if there is only |
657 |
// Trivial if we are optimizing for code size or if there is only |
| 658 |
// one use of the value. |
658 |
// one use of the value. |
| 659 |
if (CurDAG->shouldOptForSize() || V.hasOneUse()) |
659 |
if (CurDAG->shouldOptForSize() || V.hasOneUse()) |
| 660 |
return true; |
660 |
return true; |
| 661 |
// If a subtarget has a fastpath LSL we can fold a logical shift into |
661 |
// If a subtarget has a fastpath LSL we can fold a logical shift into |
| 662 |
// the addressing mode and save a cycle. |
662 |
// the addressing mode and save a cycle. |
| 663 |
if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && |
663 |
if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && |
| 664 |
isWorthFoldingSHL(V)) |
664 |
isWorthFoldingSHL(V)) |
| 665 |
return true; |
665 |
return true; |
| 666 |
if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { |
666 |
if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { |
| 667 |
const SDValue LHS = V.getOperand(0); |
667 |
const SDValue LHS = V.getOperand(0); |
| 668 |
const SDValue RHS = V.getOperand(1); |
668 |
const SDValue RHS = V.getOperand(1); |
| 669 |
if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) |
669 |
if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) |
| 670 |
return true; |
670 |
return true; |
| 671 |
if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) |
671 |
if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) |
| 672 |
return true; |
672 |
return true; |
| 673 |
} |
673 |
} |
| 674 |
|
674 |
|
| 675 |
// It hurts otherwise, since the value will be reused. |
675 |
// It hurts otherwise, since the value will be reused. |
| 676 |
return false; |
676 |
return false; |
| 677 |
} |
677 |
} |
| 678 |
|
678 |
|
| 679 |
/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 |
679 |
/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2 |
| 680 |
/// to select more shifted register |
680 |
/// to select more shifted register |
| 681 |
bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, |
681 |
bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, |
| 682 |
SDValue &Shift) { |
682 |
SDValue &Shift) { |
| 683 |
EVT VT = N.getValueType(); |
683 |
EVT VT = N.getValueType(); |
| 684 |
if (VT != MVT::i32 && VT != MVT::i64) |
684 |
if (VT != MVT::i32 && VT != MVT::i64) |
| 685 |
return false; |
685 |
return false; |
| 686 |
|
686 |
|
| 687 |
if (N->getOpcode() != ISD::AND || !N->hasOneUse()) |
687 |
if (N->getOpcode() != ISD::AND || !N->hasOneUse()) |
| 688 |
return false; |
688 |
return false; |
| 689 |
SDValue LHS = N.getOperand(0); |
689 |
SDValue LHS = N.getOperand(0); |
| 690 |
if (!LHS->hasOneUse()) |
690 |
if (!LHS->hasOneUse()) |
| 691 |
return false; |
691 |
return false; |
| 692 |
|
692 |
|
| 693 |
unsigned LHSOpcode = LHS->getOpcode(); |
693 |
unsigned LHSOpcode = LHS->getOpcode(); |
| 694 |
if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) |
694 |
if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA) |
| 695 |
return false; |
695 |
return false; |
| 696 |
|
696 |
|
| 697 |
ConstantSDNode *ShiftAmtNode = dyn_cast(LHS.getOperand(1)); |
697 |
ConstantSDNode *ShiftAmtNode = dyn_cast(LHS.getOperand(1)); |
| 698 |
if (!ShiftAmtNode) |
698 |
if (!ShiftAmtNode) |
| 699 |
return false; |
699 |
return false; |
| 700 |
|
700 |
|
| 701 |
uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); |
701 |
uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue(); |
| 702 |
ConstantSDNode *RHSC = dyn_cast(N.getOperand(1)); |
702 |
ConstantSDNode *RHSC = dyn_cast(N.getOperand(1)); |
| 703 |
if (!RHSC) |
703 |
if (!RHSC) |
| 704 |
return false; |
704 |
return false; |
| 705 |
|
705 |
|
| 706 |
APInt AndMask = RHSC->getAPIntValue(); |
706 |
APInt AndMask = RHSC->getAPIntValue(); |
| 707 |
unsigned LowZBits, MaskLen; |
707 |
unsigned LowZBits, MaskLen; |
| 708 |
if (!AndMask.isShiftedMask(LowZBits, MaskLen)) |
708 |
if (!AndMask.isShiftedMask(LowZBits, MaskLen)) |
| 709 |
return false; |
709 |
return false; |
| 710 |
|
710 |
|
| 711 |
unsigned BitWidth = N.getValueSizeInBits(); |
711 |
unsigned BitWidth = N.getValueSizeInBits(); |
| 712 |
SDLoc DL(LHS); |
712 |
SDLoc DL(LHS); |
| 713 |
uint64_t NewShiftC; |
713 |
uint64_t NewShiftC; |
| 714 |
unsigned NewShiftOp; |
714 |
unsigned NewShiftOp; |
| 715 |
if (LHSOpcode == ISD::SHL) { |
715 |
if (LHSOpcode == ISD::SHL) { |
| 716 |
// LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp |
716 |
// LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp |
| 717 |
// BitWidth != LowZBits + MaskLen doesn't match the pattern |
717 |
// BitWidth != LowZBits + MaskLen doesn't match the pattern |
| 718 |
if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) |
718 |
if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen)) |
| 719 |
return false; |
719 |
return false; |
| 720 |
|
720 |
|
| 721 |
NewShiftC = LowZBits - ShiftAmtC; |
721 |
NewShiftC = LowZBits - ShiftAmtC; |
| 722 |
NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; |
722 |
NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; |
| 723 |
} else { |
723 |
} else { |
| 724 |
if (LowZBits == 0) |
724 |
if (LowZBits == 0) |
| 725 |
return false; |
725 |
return false; |
| 726 |
|
726 |
|
| 727 |
// NewShiftC >= BitWidth will fall into isBitfieldExtractOp |
727 |
// NewShiftC >= BitWidth will fall into isBitfieldExtractOp |
| 728 |
NewShiftC = LowZBits + ShiftAmtC; |
728 |
NewShiftC = LowZBits + ShiftAmtC; |
| 729 |
if (NewShiftC >= BitWidth) |
729 |
if (NewShiftC >= BitWidth) |
| 730 |
return false; |
730 |
return false; |
| 731 |
|
731 |
|
| 732 |
// SRA need all high bits |
732 |
// SRA need all high bits |
| 733 |
if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen))) |
733 |
if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen))) |
| 734 |
return false; |
734 |
return false; |
| 735 |
|
735 |
|
| 736 |
// SRL high bits can be 0 or 1 |
736 |
// SRL high bits can be 0 or 1 |
| 737 |
if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) |
737 |
if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen))) |
| 738 |
return false; |
738 |
return false; |
| 739 |
|
739 |
|
| 740 |
if (LHSOpcode == ISD::SRL) |
740 |
if (LHSOpcode == ISD::SRL) |
| 741 |
NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; |
741 |
NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri; |
| 742 |
else |
742 |
else |
| 743 |
NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; |
743 |
NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri; |
| 744 |
} |
744 |
} |
| 745 |
|
745 |
|
| 746 |
assert(NewShiftC < BitWidth && "Invalid shift amount"); |
746 |
assert(NewShiftC < BitWidth && "Invalid shift amount"); |
| 747 |
SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); |
747 |
SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT); |
| 748 |
SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); |
748 |
SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT); |
| 749 |
Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), |
749 |
Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0), |
| 750 |
NewShiftAmt, BitWidthMinus1), |
750 |
NewShiftAmt, BitWidthMinus1), |
| 751 |
0); |
751 |
0); |
| 752 |
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); |
752 |
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits); |
| 753 |
Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); |
753 |
Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32); |
| 754 |
return true; |
754 |
return true; |
| 755 |
} |
755 |
} |
| 756 |
|
756 |
|
| 757 |
/// SelectShiftedRegister - Select a "shifted register" operand. If the value |
757 |
/// SelectShiftedRegister - Select a "shifted register" operand. If the value |
| 758 |
/// is not shifted, set the Shift operand to default of "LSL 0". The logical |
758 |
/// is not shifted, set the Shift operand to default of "LSL 0". The logical |
| 759 |
/// instructions allow the shifted register to be rotated, but the arithmetic |
759 |
/// instructions allow the shifted register to be rotated, but the arithmetic |
| 760 |
/// instructions do not. The AllowROR parameter specifies whether ROR is |
760 |
/// instructions do not. The AllowROR parameter specifies whether ROR is |
| 761 |
/// supported. |
761 |
/// supported. |
| 762 |
bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, |
762 |
bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, |
| 763 |
SDValue &Reg, SDValue &Shift) { |
763 |
SDValue &Reg, SDValue &Shift) { |
| 764 |
if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) |
764 |
if (SelectShiftedRegisterFromAnd(N, Reg, Shift)) |
| 765 |
return true; |
765 |
return true; |
| 766 |
|
766 |
|
| 767 |
AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); |
767 |
AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); |
| 768 |
if (ShType == AArch64_AM::InvalidShiftExtend) |
768 |
if (ShType == AArch64_AM::InvalidShiftExtend) |
| 769 |
return false; |
769 |
return false; |
| 770 |
if (!AllowROR && ShType == AArch64_AM::ROR) |
770 |
if (!AllowROR && ShType == AArch64_AM::ROR) |
| 771 |
return false; |
771 |
return false; |
| 772 |
|
772 |
|
| 773 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
773 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
| 774 |
unsigned BitSize = N.getValueSizeInBits(); |
774 |
unsigned BitSize = N.getValueSizeInBits(); |
| 775 |
unsigned Val = RHS->getZExtValue() & (BitSize - 1); |
775 |
unsigned Val = RHS->getZExtValue() & (BitSize - 1); |
| 776 |
unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); |
776 |
unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); |
| 777 |
|
777 |
|
| 778 |
Reg = N.getOperand(0); |
778 |
Reg = N.getOperand(0); |
| 779 |
Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); |
779 |
Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); |
| 780 |
return isWorthFolding(N); |
780 |
return isWorthFolding(N); |
| 781 |
} |
781 |
} |
| 782 |
|
782 |
|
| 783 |
return false; |
783 |
return false; |
| 784 |
} |
784 |
} |
| 785 |
|
785 |
|
| 786 |
/// getExtendTypeForNode - Translate an extend node to the corresponding |
786 |
/// getExtendTypeForNode - Translate an extend node to the corresponding |
| 787 |
/// ExtendType value. |
787 |
/// ExtendType value. |
| 788 |
static AArch64_AM::ShiftExtendType |
788 |
static AArch64_AM::ShiftExtendType |
| 789 |
getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { |
789 |
getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { |
| 790 |
if (N.getOpcode() == ISD::SIGN_EXTEND || |
790 |
if (N.getOpcode() == ISD::SIGN_EXTEND || |
| 791 |
N.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
791 |
N.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
| 792 |
EVT SrcVT; |
792 |
EVT SrcVT; |
| 793 |
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) |
793 |
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) |
| 794 |
SrcVT = cast(N.getOperand(1))->getVT(); |
794 |
SrcVT = cast(N.getOperand(1))->getVT(); |
| 795 |
else |
795 |
else |
| 796 |
SrcVT = N.getOperand(0).getValueType(); |
796 |
SrcVT = N.getOperand(0).getValueType(); |
| 797 |
|
797 |
|
| 798 |
if (!IsLoadStore && SrcVT == MVT::i8) |
798 |
if (!IsLoadStore && SrcVT == MVT::i8) |
| 799 |
return AArch64_AM::SXTB; |
799 |
return AArch64_AM::SXTB; |
| 800 |
else if (!IsLoadStore && SrcVT == MVT::i16) |
800 |
else if (!IsLoadStore && SrcVT == MVT::i16) |
| 801 |
return AArch64_AM::SXTH; |
801 |
return AArch64_AM::SXTH; |
| 802 |
else if (SrcVT == MVT::i32) |
802 |
else if (SrcVT == MVT::i32) |
| 803 |
return AArch64_AM::SXTW; |
803 |
return AArch64_AM::SXTW; |
| 804 |
assert(SrcVT != MVT::i64 && "extend from 64-bits?"); |
804 |
assert(SrcVT != MVT::i64 && "extend from 64-bits?"); |
| 805 |
|
805 |
|
| 806 |
return AArch64_AM::InvalidShiftExtend; |
806 |
return AArch64_AM::InvalidShiftExtend; |
| 807 |
} else if (N.getOpcode() == ISD::ZERO_EXTEND || |
807 |
} else if (N.getOpcode() == ISD::ZERO_EXTEND || |
| 808 |
N.getOpcode() == ISD::ANY_EXTEND) { |
808 |
N.getOpcode() == ISD::ANY_EXTEND) { |
| 809 |
EVT SrcVT = N.getOperand(0).getValueType(); |
809 |
EVT SrcVT = N.getOperand(0).getValueType(); |
| 810 |
if (!IsLoadStore && SrcVT == MVT::i8) |
810 |
if (!IsLoadStore && SrcVT == MVT::i8) |
| 811 |
return AArch64_AM::UXTB; |
811 |
return AArch64_AM::UXTB; |
| 812 |
else if (!IsLoadStore && SrcVT == MVT::i16) |
812 |
else if (!IsLoadStore && SrcVT == MVT::i16) |
| 813 |
return AArch64_AM::UXTH; |
813 |
return AArch64_AM::UXTH; |
| 814 |
else if (SrcVT == MVT::i32) |
814 |
else if (SrcVT == MVT::i32) |
| 815 |
return AArch64_AM::UXTW; |
815 |
return AArch64_AM::UXTW; |
| 816 |
assert(SrcVT != MVT::i64 && "extend from 64-bits?"); |
816 |
assert(SrcVT != MVT::i64 && "extend from 64-bits?"); |
| 817 |
|
817 |
|
| 818 |
return AArch64_AM::InvalidShiftExtend; |
818 |
return AArch64_AM::InvalidShiftExtend; |
| 819 |
} else if (N.getOpcode() == ISD::AND) { |
819 |
} else if (N.getOpcode() == ISD::AND) { |
| 820 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
820 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
| 821 |
if (!CSD) |
821 |
if (!CSD) |
| 822 |
return AArch64_AM::InvalidShiftExtend; |
822 |
return AArch64_AM::InvalidShiftExtend; |
| 823 |
uint64_t AndMask = CSD->getZExtValue(); |
823 |
uint64_t AndMask = CSD->getZExtValue(); |
| 824 |
|
824 |
|
| 825 |
switch (AndMask) { |
825 |
switch (AndMask) { |
| 826 |
default: |
826 |
default: |
| 827 |
return AArch64_AM::InvalidShiftExtend; |
827 |
return AArch64_AM::InvalidShiftExtend; |
| 828 |
case 0xFF: |
828 |
case 0xFF: |
| 829 |
return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; |
829 |
return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; |
| 830 |
case 0xFFFF: |
830 |
case 0xFFFF: |
| 831 |
return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; |
831 |
return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; |
| 832 |
case 0xFFFFFFFF: |
832 |
case 0xFFFFFFFF: |
| 833 |
return AArch64_AM::UXTW; |
833 |
return AArch64_AM::UXTW; |
| 834 |
} |
834 |
} |
| 835 |
} |
835 |
} |
| 836 |
|
836 |
|
| 837 |
return AArch64_AM::InvalidShiftExtend; |
837 |
return AArch64_AM::InvalidShiftExtend; |
| 838 |
} |
838 |
} |
| 839 |
|
839 |
|
| 840 |
/// Instructions that accept extend modifiers like UXTW expect the register |
840 |
/// Instructions that accept extend modifiers like UXTW expect the register |
| 841 |
/// being extended to be a GPR32, but the incoming DAG might be acting on a |
841 |
/// being extended to be a GPR32, but the incoming DAG might be acting on a |
| 842 |
/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if |
842 |
/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if |
| 843 |
/// this is the case. |
843 |
/// this is the case. |
| 844 |
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { |
844 |
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { |
| 845 |
if (N.getValueType() == MVT::i32) |
845 |
if (N.getValueType() == MVT::i32) |
| 846 |
return N; |
846 |
return N; |
| 847 |
|
847 |
|
| 848 |
SDLoc dl(N); |
848 |
SDLoc dl(N); |
| 849 |
return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N); |
849 |
return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N); |
| 850 |
} |
850 |
} |
| 851 |
|
851 |
|
| 852 |
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. |
852 |
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. |
| 853 |
template |
853 |
template |
| 854 |
bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { |
854 |
bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { |
| 855 |
if (!isa(N)) |
855 |
if (!isa(N)) |
| 856 |
return false; |
856 |
return false; |
| 857 |
|
857 |
|
| 858 |
int64_t MulImm = cast(N)->getSExtValue(); |
858 |
int64_t MulImm = cast(N)->getSExtValue(); |
| 859 |
if ((MulImm % std::abs(Scale)) == 0) { |
859 |
if ((MulImm % std::abs(Scale)) == 0) { |
| 860 |
int64_t RDVLImm = MulImm / Scale; |
860 |
int64_t RDVLImm = MulImm / Scale; |
| 861 |
if ((RDVLImm >= Low) && (RDVLImm <= High)) { |
861 |
if ((RDVLImm >= Low) && (RDVLImm <= High)) { |
| 862 |
Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); |
862 |
Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); |
| 863 |
return true; |
863 |
return true; |
| 864 |
} |
864 |
} |
| 865 |
} |
865 |
} |
| 866 |
|
866 |
|
| 867 |
return false; |
867 |
return false; |
| 868 |
} |
868 |
} |
| 869 |
|
869 |
|
| 870 |
/// SelectArithExtendedRegister - Select a "extended register" operand. This |
870 |
/// SelectArithExtendedRegister - Select a "extended register" operand. This |
| 871 |
/// operand folds in an extend followed by an optional left shift. |
871 |
/// operand folds in an extend followed by an optional left shift. |
| 872 |
bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, |
872 |
bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, |
| 873 |
SDValue &Shift) { |
873 |
SDValue &Shift) { |
| 874 |
unsigned ShiftVal = 0; |
874 |
unsigned ShiftVal = 0; |
| 875 |
AArch64_AM::ShiftExtendType Ext; |
875 |
AArch64_AM::ShiftExtendType Ext; |
| 876 |
|
876 |
|
| 877 |
if (N.getOpcode() == ISD::SHL) { |
877 |
if (N.getOpcode() == ISD::SHL) { |
| 878 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
878 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
| 879 |
if (!CSD) |
879 |
if (!CSD) |
| 880 |
return false; |
880 |
return false; |
| 881 |
ShiftVal = CSD->getZExtValue(); |
881 |
ShiftVal = CSD->getZExtValue(); |
| 882 |
if (ShiftVal > 4) |
882 |
if (ShiftVal > 4) |
| 883 |
return false; |
883 |
return false; |
| 884 |
|
884 |
|
| 885 |
Ext = getExtendTypeForNode(N.getOperand(0)); |
885 |
Ext = getExtendTypeForNode(N.getOperand(0)); |
| 886 |
if (Ext == AArch64_AM::InvalidShiftExtend) |
886 |
if (Ext == AArch64_AM::InvalidShiftExtend) |
| 887 |
return false; |
887 |
return false; |
| 888 |
|
888 |
|
| 889 |
Reg = N.getOperand(0).getOperand(0); |
889 |
Reg = N.getOperand(0).getOperand(0); |
| 890 |
} else { |
890 |
} else { |
| 891 |
Ext = getExtendTypeForNode(N); |
891 |
Ext = getExtendTypeForNode(N); |
| 892 |
if (Ext == AArch64_AM::InvalidShiftExtend) |
892 |
if (Ext == AArch64_AM::InvalidShiftExtend) |
| 893 |
return false; |
893 |
return false; |
| 894 |
|
894 |
|
| 895 |
Reg = N.getOperand(0); |
895 |
Reg = N.getOperand(0); |
| 896 |
|
896 |
|
| 897 |
// Don't match if free 32-bit -> 64-bit zext can be used instead. Use the |
897 |
// Don't match if free 32-bit -> 64-bit zext can be used instead. Use the |
| 898 |
// isDef32 as a heuristic for when the operand is likely to be a 32bit def. |
898 |
// isDef32 as a heuristic for when the operand is likely to be a 32bit def. |
| 899 |
auto isDef32 = [](SDValue N) { |
899 |
auto isDef32 = [](SDValue N) { |
| 900 |
unsigned Opc = N.getOpcode(); |
900 |
unsigned Opc = N.getOpcode(); |
| 901 |
return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && |
901 |
return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && |
| 902 |
Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && |
902 |
Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && |
| 903 |
Opc != ISD::AssertZext && Opc != ISD::AssertAlign && |
903 |
Opc != ISD::AssertZext && Opc != ISD::AssertAlign && |
| 904 |
Opc != ISD::FREEZE; |
904 |
Opc != ISD::FREEZE; |
| 905 |
}; |
905 |
}; |
| 906 |
if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && |
906 |
if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && |
| 907 |
isDef32(Reg)) |
907 |
isDef32(Reg)) |
| 908 |
return false; |
908 |
return false; |
| 909 |
} |
909 |
} |
| 910 |
|
910 |
|
| 911 |
// AArch64 mandates that the RHS of the operation must use the smallest |
911 |
// AArch64 mandates that the RHS of the operation must use the smallest |
| 912 |
// register class that could contain the size being extended from. Thus, |
912 |
// register class that could contain the size being extended from. Thus, |
| 913 |
// if we're folding a (sext i8), we need the RHS to be a GPR32, even though |
913 |
// if we're folding a (sext i8), we need the RHS to be a GPR32, even though |
| 914 |
// there might not be an actual 32-bit value in the program. We can |
914 |
// there might not be an actual 32-bit value in the program. We can |
| 915 |
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. |
915 |
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. |
| 916 |
assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); |
916 |
assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); |
| 917 |
Reg = narrowIfNeeded(CurDAG, Reg); |
917 |
Reg = narrowIfNeeded(CurDAG, Reg); |
| 918 |
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), |
918 |
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), |
| 919 |
MVT::i32); |
919 |
MVT::i32); |
| 920 |
return isWorthFolding(N); |
920 |
return isWorthFolding(N); |
| 921 |
} |
921 |
} |
| 922 |
|
922 |
|
| 923 |
/// SelectArithUXTXRegister - Select a "UXTX register" operand. This |
923 |
/// SelectArithUXTXRegister - Select a "UXTX register" operand. This |
| 924 |
/// operand is refered by the instructions have SP operand |
924 |
/// operand is refered by the instructions have SP operand |
| 925 |
bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, |
925 |
bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, |
| 926 |
SDValue &Shift) { |
926 |
SDValue &Shift) { |
| 927 |
unsigned ShiftVal = 0; |
927 |
unsigned ShiftVal = 0; |
| 928 |
AArch64_AM::ShiftExtendType Ext; |
928 |
AArch64_AM::ShiftExtendType Ext; |
| 929 |
|
929 |
|
| 930 |
if (N.getOpcode() != ISD::SHL) |
930 |
if (N.getOpcode() != ISD::SHL) |
| 931 |
return false; |
931 |
return false; |
| 932 |
|
932 |
|
| 933 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
933 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
| 934 |
if (!CSD) |
934 |
if (!CSD) |
| 935 |
return false; |
935 |
return false; |
| 936 |
ShiftVal = CSD->getZExtValue(); |
936 |
ShiftVal = CSD->getZExtValue(); |
| 937 |
if (ShiftVal > 4) |
937 |
if (ShiftVal > 4) |
| 938 |
return false; |
938 |
return false; |
| 939 |
|
939 |
|
| 940 |
Ext = AArch64_AM::UXTX; |
940 |
Ext = AArch64_AM::UXTX; |
| 941 |
Reg = N.getOperand(0); |
941 |
Reg = N.getOperand(0); |
| 942 |
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), |
942 |
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), |
| 943 |
MVT::i32); |
943 |
MVT::i32); |
| 944 |
return isWorthFolding(N); |
944 |
return isWorthFolding(N); |
| 945 |
} |
945 |
} |
| 946 |
|
946 |
|
| 947 |
/// If there's a use of this ADDlow that's not itself a load/store then we'll |
947 |
/// If there's a use of this ADDlow that's not itself a load/store then we'll |
| 948 |
/// need to create a real ADD instruction from it anyway and there's no point in |
948 |
/// need to create a real ADD instruction from it anyway and there's no point in |
| 949 |
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's |
949 |
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's |
| 950 |
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding |
950 |
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding |
| 951 |
/// leads to duplicated ADRP instructions. |
951 |
/// leads to duplicated ADRP instructions. |
| 952 |
static bool isWorthFoldingADDlow(SDValue N) { |
952 |
static bool isWorthFoldingADDlow(SDValue N) { |
| 953 |
for (auto *Use : N->uses()) { |
953 |
for (auto *Use : N->uses()) { |
| 954 |
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && |
954 |
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && |
| 955 |
Use->getOpcode() != ISD::ATOMIC_LOAD && |
955 |
Use->getOpcode() != ISD::ATOMIC_LOAD && |
| 956 |
Use->getOpcode() != ISD::ATOMIC_STORE) |
956 |
Use->getOpcode() != ISD::ATOMIC_STORE) |
| 957 |
return false; |
957 |
return false; |
| 958 |
|
958 |
|
| 959 |
// ldar and stlr have much more restrictive addressing modes (just a |
959 |
// ldar and stlr have much more restrictive addressing modes (just a |
| 960 |
// register). |
960 |
// register). |
| 961 |
if (isStrongerThanMonotonic(cast(Use)->getSuccessOrdering())) |
961 |
if (isStrongerThanMonotonic(cast(Use)->getSuccessOrdering())) |
| 962 |
return false; |
962 |
return false; |
| 963 |
} |
963 |
} |
| 964 |
|
964 |
|
| 965 |
return true; |
965 |
return true; |
| 966 |
} |
966 |
} |
| 967 |
|
967 |
|
| 968 |
/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit |
968 |
/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit |
| 969 |
/// immediate" address. The "Size" argument is the size in bytes of the memory |
969 |
/// immediate" address. The "Size" argument is the size in bytes of the memory |
| 970 |
/// reference, which determines the scale. |
970 |
/// reference, which determines the scale. |
| 971 |
bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, |
971 |
bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, |
| 972 |
unsigned BW, unsigned Size, |
972 |
unsigned BW, unsigned Size, |
| 973 |
SDValue &Base, |
973 |
SDValue &Base, |
| 974 |
SDValue &OffImm) { |
974 |
SDValue &OffImm) { |
| 975 |
SDLoc dl(N); |
975 |
SDLoc dl(N); |
| 976 |
const DataLayout &DL = CurDAG->getDataLayout(); |
976 |
const DataLayout &DL = CurDAG->getDataLayout(); |
| 977 |
const TargetLowering *TLI = getTargetLowering(); |
977 |
const TargetLowering *TLI = getTargetLowering(); |
| 978 |
if (N.getOpcode() == ISD::FrameIndex) { |
978 |
if (N.getOpcode() == ISD::FrameIndex) { |
| 979 |
int FI = cast(N)->getIndex(); |
979 |
int FI = cast(N)->getIndex(); |
| 980 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
980 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 981 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
981 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| 982 |
return true; |
982 |
return true; |
| 983 |
} |
983 |
} |
| 984 |
|
984 |
|
| 985 |
// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed |
985 |
// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed |
| 986 |
// selected here doesn't support labels/immediates, only base+offset. |
986 |
// selected here doesn't support labels/immediates, only base+offset. |
| 987 |
if (CurDAG->isBaseWithConstantOffset(N)) { |
987 |
if (CurDAG->isBaseWithConstantOffset(N)) { |
| 988 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
988 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
| 989 |
if (IsSignedImm) { |
989 |
if (IsSignedImm) { |
| 990 |
int64_t RHSC = RHS->getSExtValue(); |
990 |
int64_t RHSC = RHS->getSExtValue(); |
| 991 |
unsigned Scale = Log2_32(Size); |
991 |
unsigned Scale = Log2_32(Size); |
| 992 |
int64_t Range = 0x1LL << (BW - 1); |
992 |
int64_t Range = 0x1LL << (BW - 1); |
| 993 |
|
993 |
|
| 994 |
if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && |
994 |
if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && |
| 995 |
RHSC < (Range << Scale)) { |
995 |
RHSC < (Range << Scale)) { |
| 996 |
Base = N.getOperand(0); |
996 |
Base = N.getOperand(0); |
| 997 |
if (Base.getOpcode() == ISD::FrameIndex) { |
997 |
if (Base.getOpcode() == ISD::FrameIndex) { |
| 998 |
int FI = cast(Base)->getIndex(); |
998 |
int FI = cast(Base)->getIndex(); |
| 999 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
999 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 1000 |
} |
1000 |
} |
| 1001 |
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
1001 |
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
| 1002 |
return true; |
1002 |
return true; |
| 1003 |
} |
1003 |
} |
| 1004 |
} else { |
1004 |
} else { |
| 1005 |
// unsigned Immediate |
1005 |
// unsigned Immediate |
| 1006 |
uint64_t RHSC = RHS->getZExtValue(); |
1006 |
uint64_t RHSC = RHS->getZExtValue(); |
| 1007 |
unsigned Scale = Log2_32(Size); |
1007 |
unsigned Scale = Log2_32(Size); |
| 1008 |
uint64_t Range = 0x1ULL << BW; |
1008 |
uint64_t Range = 0x1ULL << BW; |
| 1009 |
|
1009 |
|
| 1010 |
if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { |
1010 |
if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { |
| 1011 |
Base = N.getOperand(0); |
1011 |
Base = N.getOperand(0); |
| 1012 |
if (Base.getOpcode() == ISD::FrameIndex) { |
1012 |
if (Base.getOpcode() == ISD::FrameIndex) { |
| 1013 |
int FI = cast(Base)->getIndex(); |
1013 |
int FI = cast(Base)->getIndex(); |
| 1014 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
1014 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 1015 |
} |
1015 |
} |
| 1016 |
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
1016 |
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
| 1017 |
return true; |
1017 |
return true; |
| 1018 |
} |
1018 |
} |
| 1019 |
} |
1019 |
} |
| 1020 |
} |
1020 |
} |
| 1021 |
} |
1021 |
} |
| 1022 |
// Base only. The address will be materialized into a register before |
1022 |
// Base only. The address will be materialized into a register before |
| 1023 |
// the memory is accessed. |
1023 |
// the memory is accessed. |
| 1024 |
// add x0, Xbase, #offset |
1024 |
// add x0, Xbase, #offset |
| 1025 |
// stp x1, x2, [x0] |
1025 |
// stp x1, x2, [x0] |
| 1026 |
Base = N; |
1026 |
Base = N; |
| 1027 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
1027 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| 1028 |
return true; |
1028 |
return true; |
| 1029 |
} |
1029 |
} |
| 1030 |
|
1030 |
|
| 1031 |
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit |
1031 |
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit |
| 1032 |
/// immediate" address. The "Size" argument is the size in bytes of the memory |
1032 |
/// immediate" address. The "Size" argument is the size in bytes of the memory |
| 1033 |
/// reference, which determines the scale. |
1033 |
/// reference, which determines the scale. |
| 1034 |
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, |
1034 |
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, |
| 1035 |
SDValue &Base, SDValue &OffImm) { |
1035 |
SDValue &Base, SDValue &OffImm) { |
| 1036 |
SDLoc dl(N); |
1036 |
SDLoc dl(N); |
| 1037 |
const DataLayout &DL = CurDAG->getDataLayout(); |
1037 |
const DataLayout &DL = CurDAG->getDataLayout(); |
| 1038 |
const TargetLowering *TLI = getTargetLowering(); |
1038 |
const TargetLowering *TLI = getTargetLowering(); |
| 1039 |
if (N.getOpcode() == ISD::FrameIndex) { |
1039 |
if (N.getOpcode() == ISD::FrameIndex) { |
| 1040 |
int FI = cast(N)->getIndex(); |
1040 |
int FI = cast(N)->getIndex(); |
| 1041 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
1041 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 1042 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
1042 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| 1043 |
return true; |
1043 |
return true; |
| 1044 |
} |
1044 |
} |
| 1045 |
|
1045 |
|
| 1046 |
if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { |
1046 |
if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { |
| 1047 |
GlobalAddressSDNode *GAN = |
1047 |
GlobalAddressSDNode *GAN = |
| 1048 |
dyn_cast(N.getOperand(1).getNode()); |
1048 |
dyn_cast(N.getOperand(1).getNode()); |
| 1049 |
Base = N.getOperand(0); |
1049 |
Base = N.getOperand(0); |
| 1050 |
OffImm = N.getOperand(1); |
1050 |
OffImm = N.getOperand(1); |
| 1051 |
if (!GAN) |
1051 |
if (!GAN) |
| 1052 |
return true; |
1052 |
return true; |
| 1053 |
|
1053 |
|
| 1054 |
if (GAN->getOffset() % Size == 0 && |
1054 |
if (GAN->getOffset() % Size == 0 && |
| 1055 |
GAN->getGlobal()->getPointerAlignment(DL) >= Size) |
1055 |
GAN->getGlobal()->getPointerAlignment(DL) >= Size) |
| 1056 |
return true; |
1056 |
return true; |
| 1057 |
} |
1057 |
} |
| 1058 |
|
1058 |
|
| 1059 |
if (CurDAG->isBaseWithConstantOffset(N)) { |
1059 |
if (CurDAG->isBaseWithConstantOffset(N)) { |
| 1060 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
1060 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
| 1061 |
int64_t RHSC = (int64_t)RHS->getZExtValue(); |
1061 |
int64_t RHSC = (int64_t)RHS->getZExtValue(); |
| 1062 |
unsigned Scale = Log2_32(Size); |
1062 |
unsigned Scale = Log2_32(Size); |
| 1063 |
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { |
1063 |
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { |
| 1064 |
Base = N.getOperand(0); |
1064 |
Base = N.getOperand(0); |
| 1065 |
if (Base.getOpcode() == ISD::FrameIndex) { |
1065 |
if (Base.getOpcode() == ISD::FrameIndex) { |
| 1066 |
int FI = cast(Base)->getIndex(); |
1066 |
int FI = cast(Base)->getIndex(); |
| 1067 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
1067 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 1068 |
} |
1068 |
} |
| 1069 |
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
1069 |
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); |
| 1070 |
return true; |
1070 |
return true; |
| 1071 |
} |
1071 |
} |
| 1072 |
} |
1072 |
} |
| 1073 |
} |
1073 |
} |
| 1074 |
|
1074 |
|
| 1075 |
// Before falling back to our general case, check if the unscaled |
1075 |
// Before falling back to our general case, check if the unscaled |
| 1076 |
// instructions can handle this. If so, that's preferable. |
1076 |
// instructions can handle this. If so, that's preferable. |
| 1077 |
if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) |
1077 |
if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) |
| 1078 |
return false; |
1078 |
return false; |
| 1079 |
|
1079 |
|
| 1080 |
// Base only. The address will be materialized into a register before |
1080 |
// Base only. The address will be materialized into a register before |
| 1081 |
// the memory is accessed. |
1081 |
// the memory is accessed. |
| 1082 |
// add x0, Xbase, #offset |
1082 |
// add x0, Xbase, #offset |
| 1083 |
// ldr x0, [x0] |
1083 |
// ldr x0, [x0] |
| 1084 |
Base = N; |
1084 |
Base = N; |
| 1085 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
1085 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| 1086 |
return true; |
1086 |
return true; |
| 1087 |
} |
1087 |
} |
| 1088 |
|
1088 |
|
| 1089 |
/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit |
1089 |
/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit |
| 1090 |
/// immediate" address. This should only match when there is an offset that |
1090 |
/// immediate" address. This should only match when there is an offset that |
| 1091 |
/// is not valid for a scaled immediate addressing mode. The "Size" argument |
1091 |
/// is not valid for a scaled immediate addressing mode. The "Size" argument |
| 1092 |
/// is the size in bytes of the memory reference, which is needed here to know |
1092 |
/// is the size in bytes of the memory reference, which is needed here to know |
| 1093 |
/// what is valid for a scaled immediate. |
1093 |
/// what is valid for a scaled immediate. |
| 1094 |
bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, |
1094 |
bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, |
| 1095 |
SDValue &Base, |
1095 |
SDValue &Base, |
| 1096 |
SDValue &OffImm) { |
1096 |
SDValue &OffImm) { |
| 1097 |
if (!CurDAG->isBaseWithConstantOffset(N)) |
1097 |
if (!CurDAG->isBaseWithConstantOffset(N)) |
| 1098 |
return false; |
1098 |
return false; |
| 1099 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
1099 |
if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { |
| 1100 |
int64_t RHSC = RHS->getSExtValue(); |
1100 |
int64_t RHSC = RHS->getSExtValue(); |
| 1101 |
// If the offset is valid as a scaled immediate, don't match here. |
1101 |
// If the offset is valid as a scaled immediate, don't match here. |
| 1102 |
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && |
1102 |
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && |
| 1103 |
RHSC < (0x1000 << Log2_32(Size))) |
1103 |
RHSC < (0x1000 << Log2_32(Size))) |
| 1104 |
return false; |
1104 |
return false; |
| 1105 |
if (RHSC >= -256 && RHSC < 256) { |
1105 |
if (RHSC >= -256 && RHSC < 256) { |
| 1106 |
Base = N.getOperand(0); |
1106 |
Base = N.getOperand(0); |
| 1107 |
if (Base.getOpcode() == ISD::FrameIndex) { |
1107 |
if (Base.getOpcode() == ISD::FrameIndex) { |
| 1108 |
int FI = cast(Base)->getIndex(); |
1108 |
int FI = cast(Base)->getIndex(); |
| 1109 |
const TargetLowering *TLI = getTargetLowering(); |
1109 |
const TargetLowering *TLI = getTargetLowering(); |
| 1110 |
Base = CurDAG->getTargetFrameIndex( |
1110 |
Base = CurDAG->getTargetFrameIndex( |
| 1111 |
FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
1111 |
FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| 1112 |
} |
1112 |
} |
| 1113 |
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); |
1113 |
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); |
| 1114 |
return true; |
1114 |
return true; |
| 1115 |
} |
1115 |
} |
| 1116 |
} |
1116 |
} |
| 1117 |
return false; |
1117 |
return false; |
| 1118 |
} |
1118 |
} |
| 1119 |
|
1119 |
|
| 1120 |
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { |
1120 |
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { |
| 1121 |
SDLoc dl(N); |
1121 |
SDLoc dl(N); |
| 1122 |
SDValue ImpDef = SDValue( |
1122 |
SDValue ImpDef = SDValue( |
| 1123 |
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); |
1123 |
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); |
| 1124 |
return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef, |
1124 |
return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef, |
| 1125 |
N); |
1125 |
N); |
| 1126 |
} |
1126 |
} |
| 1127 |
|
1127 |
|
| 1128 |
/// Check if the given SHL node (\p N), can be used to form an |
1128 |
/// Check if the given SHL node (\p N), can be used to form an |
| 1129 |
/// extended register for an addressing mode. |
1129 |
/// extended register for an addressing mode. |
| 1130 |
bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, |
1130 |
bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, |
| 1131 |
bool WantExtend, SDValue &Offset, |
1131 |
bool WantExtend, SDValue &Offset, |
| 1132 |
SDValue &SignExtend) { |
1132 |
SDValue &SignExtend) { |
| 1133 |
assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); |
1133 |
assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); |
| 1134 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
1134 |
ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); |
| 1135 |
if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) |
1135 |
if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) |
| 1136 |
return false; |
1136 |
return false; |
| 1137 |
|
1137 |
|
| 1138 |
SDLoc dl(N); |
1138 |
SDLoc dl(N); |
| 1139 |
if (WantExtend) { |
1139 |
if (WantExtend) { |
| 1140 |
AArch64_AM::ShiftExtendType Ext = |
1140 |
AArch64_AM::ShiftExtendType Ext = |
| 1141 |
getExtendTypeForNode(N.getOperand(0), true); |
1141 |
getExtendTypeForNode(N.getOperand(0), true); |
| 1142 |
if (Ext == AArch64_AM::InvalidShiftExtend) |
1142 |
if (Ext == AArch64_AM::InvalidShiftExtend) |
| 1143 |
return false; |
1143 |
return false; |
| 1144 |
|
1144 |
|
| 1145 |
Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); |
1145 |
Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); |
| 1146 |
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
1146 |
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
| 1147 |
MVT::i32); |
1147 |
MVT::i32); |
| 1148 |
} else { |
1148 |
} else { |
| 1149 |
Offset = N.getOperand(0); |
1149 |
Offset = N.getOperand(0); |
| 1150 |
SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); |
1150 |
SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); |
| 1151 |
} |
1151 |
} |
| 1152 |
|
1152 |
|
| 1153 |
unsigned LegalShiftVal = Log2_32(Size); |
1153 |
unsigned LegalShiftVal = Log2_32(Size); |
| 1154 |
unsigned ShiftVal = CSD->getZExtValue(); |
1154 |
unsigned ShiftVal = CSD->getZExtValue(); |
| 1155 |
|
1155 |
|
| 1156 |
if (ShiftVal != 0 && ShiftVal != LegalShiftVal) |
1156 |
if (ShiftVal != 0 && ShiftVal != LegalShiftVal) |
| 1157 |
return false; |
1157 |
return false; |
| 1158 |
|
1158 |
|
| 1159 |
return isWorthFolding(N); |
1159 |
return isWorthFolding(N); |
| 1160 |
} |
1160 |
} |
| 1161 |
|
1161 |
|
| 1162 |
bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, |
1162 |
bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, |
| 1163 |
SDValue &Base, SDValue &Offset, |
1163 |
SDValue &Base, SDValue &Offset, |
| 1164 |
SDValue &SignExtend, |
1164 |
SDValue &SignExtend, |
| 1165 |
SDValue &DoShift) { |
1165 |
SDValue &DoShift) { |
| 1166 |
if (N.getOpcode() != ISD::ADD) |
1166 |
if (N.getOpcode() != ISD::ADD) |
| 1167 |
return false; |
1167 |
return false; |
| 1168 |
SDValue LHS = N.getOperand(0); |
1168 |
SDValue LHS = N.getOperand(0); |
| 1169 |
SDValue RHS = N.getOperand(1); |
1169 |
SDValue RHS = N.getOperand(1); |
| 1170 |
SDLoc dl(N); |
1170 |
SDLoc dl(N); |
| 1171 |
|
1171 |
|
| 1172 |
// We don't want to match immediate adds here, because they are better lowered |
1172 |
// We don't want to match immediate adds here, because they are better lowered |
| 1173 |
// to the register-immediate addressing modes. |
1173 |
// to the register-immediate addressing modes. |
| 1174 |
if (isa(LHS) || isa(RHS)) |
1174 |
if (isa(LHS) || isa(RHS)) |
| 1175 |
return false; |
1175 |
return false; |
| 1176 |
|
1176 |
|
| 1177 |
// Check if this particular node is reused in any non-memory related |
1177 |
// Check if this particular node is reused in any non-memory related |
| 1178 |
// operation. If yes, do not try to fold this node into the address |
1178 |
// operation. If yes, do not try to fold this node into the address |
| 1179 |
// computation, since the computation will be kept. |
1179 |
// computation, since the computation will be kept. |
| 1180 |
const SDNode *Node = N.getNode(); |
1180 |
const SDNode *Node = N.getNode(); |
| 1181 |
for (SDNode *UI : Node->uses()) { |
1181 |
for (SDNode *UI : Node->uses()) { |
| 1182 |
if (!isa(*UI)) |
1182 |
if (!isa(*UI)) |
| 1183 |
return false; |
1183 |
return false; |
| 1184 |
} |
1184 |
} |
| 1185 |
|
1185 |
|
| 1186 |
// Remember if it is worth folding N when it produces extended register. |
1186 |
// Remember if it is worth folding N when it produces extended register. |
| 1187 |
bool IsExtendedRegisterWorthFolding = isWorthFolding(N); |
1187 |
bool IsExtendedRegisterWorthFolding = isWorthFolding(N); |
| 1188 |
|
1188 |
|
| 1189 |
// Try to match a shifted extend on the RHS. |
1189 |
// Try to match a shifted extend on the RHS. |
| 1190 |
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && |
1190 |
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && |
| 1191 |
SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { |
1191 |
SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { |
| 1192 |
Base = LHS; |
1192 |
Base = LHS; |
| 1193 |
DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); |
1193 |
DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); |
| 1194 |
return true; |
1194 |
return true; |
| 1195 |
} |
1195 |
} |
| 1196 |
|
1196 |
|
| 1197 |
// Try to match a shifted extend on the LHS. |
1197 |
// Try to match a shifted extend on the LHS. |
| 1198 |
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && |
1198 |
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && |
| 1199 |
SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { |
1199 |
SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { |
| 1200 |
Base = RHS; |
1200 |
Base = RHS; |
| 1201 |
DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); |
1201 |
DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); |
| 1202 |
return true; |
1202 |
return true; |
| 1203 |
} |
1203 |
} |
| 1204 |
|
1204 |
|
| 1205 |
// There was no shift, whatever else we find. |
1205 |
// There was no shift, whatever else we find. |
| 1206 |
DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); |
1206 |
DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); |
| 1207 |
|
1207 |
|
| 1208 |
AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; |
1208 |
AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; |
| 1209 |
// Try to match an unshifted extend on the LHS. |
1209 |
// Try to match an unshifted extend on the LHS. |
| 1210 |
if (IsExtendedRegisterWorthFolding && |
1210 |
if (IsExtendedRegisterWorthFolding && |
| 1211 |
(Ext = getExtendTypeForNode(LHS, true)) != |
1211 |
(Ext = getExtendTypeForNode(LHS, true)) != |
| 1212 |
AArch64_AM::InvalidShiftExtend) { |
1212 |
AArch64_AM::InvalidShiftExtend) { |
| 1213 |
Base = RHS; |
1213 |
Base = RHS; |
| 1214 |
Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); |
1214 |
Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); |
| 1215 |
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
1215 |
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
| 1216 |
MVT::i32); |
1216 |
MVT::i32); |
| 1217 |
if (isWorthFolding(LHS)) |
1217 |
if (isWorthFolding(LHS)) |
| 1218 |
return true; |
1218 |
return true; |
| 1219 |
} |
1219 |
} |
| 1220 |
|
1220 |
|
| 1221 |
// Try to match an unshifted extend on the RHS. |
1221 |
// Try to match an unshifted extend on the RHS. |
| 1222 |
if (IsExtendedRegisterWorthFolding && |
1222 |
if (IsExtendedRegisterWorthFolding && |
| 1223 |
(Ext = getExtendTypeForNode(RHS, true)) != |
1223 |
(Ext = getExtendTypeForNode(RHS, true)) != |
| 1224 |
AArch64_AM::InvalidShiftExtend) { |
1224 |
AArch64_AM::InvalidShiftExtend) { |
| 1225 |
Base = LHS; |
1225 |
Base = LHS; |
| 1226 |
Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); |
1226 |
Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); |
| 1227 |
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
1227 |
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, |
| 1228 |
MVT::i32); |
1228 |
MVT::i32); |
| 1229 |
if (isWorthFolding(RHS)) |
1229 |
if (isWorthFolding(RHS)) |
| 1230 |
return true; |
1230 |
return true; |
| 1231 |
} |
1231 |
} |
| 1232 |
|
1232 |
|
| 1233 |
return false; |
1233 |
return false; |
| 1234 |
} |
1234 |
} |
| 1235 |
|
1235 |
|
| 1236 |
// Check if the given immediate is preferred by ADD. If an immediate can be |
1236 |
// Check if the given immediate is preferred by ADD. If an immediate can be |
| 1237 |
// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be |
1237 |
// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be |
| 1238 |
// encoded by one MOVZ, return true. |
1238 |
// encoded by one MOVZ, return true. |
| 1239 |
static bool isPreferredADD(int64_t ImmOff) { |
1239 |
static bool isPreferredADD(int64_t ImmOff) { |
| 1240 |
// Constant in [0x0, 0xfff] can be encoded in ADD. |
1240 |
// Constant in [0x0, 0xfff] can be encoded in ADD. |
| 1241 |
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) |
1241 |
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) |
| 1242 |
return true; |
1242 |
return true; |
| 1243 |
// Check if it can be encoded in an "ADD LSL #12". |
1243 |
// Check if it can be encoded in an "ADD LSL #12". |
| 1244 |
if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) |
1244 |
if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) |
| 1245 |
// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. |
1245 |
// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. |
| 1246 |
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && |
1246 |
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && |
| 1247 |
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL; |
1247 |
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL; |
| 1248 |
return false; |
1248 |
return false; |
| 1249 |
} |
1249 |
} |
| 1250 |
|
1250 |
|
| 1251 |
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, |
1251 |
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, |
| 1252 |
SDValue &Base, SDValue &Offset, |
1252 |
SDValue &Base, SDValue &Offset, |
| 1253 |
SDValue &SignExtend, |
1253 |
SDValue &SignExtend, |
| 1254 |
SDValue &DoShift) { |
1254 |
SDValue &DoShift) { |
| 1255 |
if (N.getOpcode() != ISD::ADD) |
1255 |
if (N.getOpcode() != ISD::ADD) |
| 1256 |
return false; |
1256 |
return false; |
| 1257 |
SDValue LHS = N.getOperand(0); |
1257 |
SDValue LHS = N.getOperand(0); |
| 1258 |
SDValue RHS = N.getOperand(1); |
1258 |
SDValue RHS = N.getOperand(1); |
| 1259 |
SDLoc DL(N); |
1259 |
SDLoc DL(N); |
| 1260 |
|
1260 |
|
| 1261 |
// Check if this particular node is reused in any non-memory related |
1261 |
// Check if this particular node is reused in any non-memory related |
| 1262 |
// operation. If yes, do not try to fold this node into the address |
1262 |
// operation. If yes, do not try to fold this node into the address |
| 1263 |
// computation, since the computation will be kept. |
1263 |
// computation, since the computation will be kept. |
| 1264 |
const SDNode *Node = N.getNode(); |
1264 |
const SDNode *Node = N.getNode(); |
| 1265 |
for (SDNode *UI : Node->uses()) { |
1265 |
for (SDNode *UI : Node->uses()) { |
| 1266 |
if (!isa(*UI)) |
1266 |
if (!isa(*UI)) |
| 1267 |
return false; |
1267 |
return false; |
| 1268 |
} |
1268 |
} |
| 1269 |
|
1269 |
|
| 1270 |
// Watch out if RHS is a wide immediate, it can not be selected into |
1270 |
// Watch out if RHS is a wide immediate, it can not be selected into |
| 1271 |
// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into |
1271 |
// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into |
| 1272 |
// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate |
1272 |
// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate |
| 1273 |
// instructions like: |
1273 |
// instructions like: |
| 1274 |
// MOV X0, WideImmediate |
1274 |
// MOV X0, WideImmediate |
| 1275 |
// ADD X1, BaseReg, X0 |
1275 |
// ADD X1, BaseReg, X0 |
| 1276 |
// LDR X2, [X1, 0] |
1276 |
// LDR X2, [X1, 0] |
| 1277 |
// For such situation, using [BaseReg, XReg] addressing mode can save one |
1277 |
// For such situation, using [BaseReg, XReg] addressing mode can save one |
| 1278 |
// ADD/SUB: |
1278 |
// ADD/SUB: |
| 1279 |
// MOV X0, WideImmediate |
1279 |
// MOV X0, WideImmediate |
| 1280 |
// LDR X2, [BaseReg, X0] |
1280 |
// LDR X2, [BaseReg, X0] |
| 1281 |
if (isa(RHS)) { |
1281 |
if (isa(RHS)) { |
| 1282 |
int64_t ImmOff = (int64_t)cast(RHS)->getZExtValue(); |
1282 |
int64_t ImmOff = (int64_t)cast(RHS)->getZExtValue(); |
| 1283 |
unsigned Scale = Log2_32(Size); |
1283 |
unsigned Scale = Log2_32(Size); |
| 1284 |
// Skip the immediate can be selected by load/store addressing mode. |
1284 |
// Skip the immediate can be selected by load/store addressing mode. |
| 1285 |
// Also skip the immediate can be encoded by a single ADD (SUB is also |
1285 |
// Also skip the immediate can be encoded by a single ADD (SUB is also |
| 1286 |
// checked by using -ImmOff). |
1286 |
// checked by using -ImmOff). |
| 1287 |
if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || |
1287 |
if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || |
| 1288 |
isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) |
1288 |
isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) |
| 1289 |
return false; |
1289 |
return false; |
| 1290 |
|
1290 |
|
| 1291 |
SDValue Ops[] = { RHS }; |
1291 |
SDValue Ops[] = { RHS }; |
| 1292 |
SDNode *MOVI = |
1292 |
SDNode *MOVI = |
| 1293 |
CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); |
1293 |
CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); |
| 1294 |
SDValue MOVIV = SDValue(MOVI, 0); |
1294 |
SDValue MOVIV = SDValue(MOVI, 0); |
| 1295 |
// This ADD of two X register will be selected into [Reg+Reg] mode. |
1295 |
// This ADD of two X register will be selected into [Reg+Reg] mode. |
| 1296 |
N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); |
1296 |
N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); |
| 1297 |
} |
1297 |
} |
| 1298 |
|
1298 |
|
| 1299 |
// Remember if it is worth folding N when it produces extended register. |
1299 |
// Remember if it is worth folding N when it produces extended register. |
| 1300 |
bool IsExtendedRegisterWorthFolding = isWorthFolding(N); |
1300 |
bool IsExtendedRegisterWorthFolding = isWorthFolding(N); |
| 1301 |
|
1301 |
|
| 1302 |
// Try to match a shifted extend on the RHS. |
1302 |
// Try to match a shifted extend on the RHS. |
| 1303 |
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && |
1303 |
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && |
| 1304 |
SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { |
1304 |
SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { |
| 1305 |
Base = LHS; |
1305 |
Base = LHS; |
| 1306 |
DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); |
1306 |
DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); |
| 1307 |
return true; |
1307 |
return true; |
| 1308 |
} |
1308 |
} |
| 1309 |
|
1309 |
|
| 1310 |
// Try to match a shifted extend on the LHS. |
1310 |
// Try to match a shifted extend on the LHS. |
| 1311 |
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && |
1311 |
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && |
| 1312 |
SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { |
1312 |
SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { |
| 1313 |
Base = RHS; |
1313 |
Base = RHS; |
| 1314 |
DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); |
1314 |
DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); |
| 1315 |
return true; |
1315 |
return true; |
| 1316 |
} |
1316 |
} |
| 1317 |
|
1317 |
|
| 1318 |
// Match any non-shifted, non-extend, non-immediate add expression. |
1318 |
// Match any non-shifted, non-extend, non-immediate add expression. |
| 1319 |
Base = LHS; |
1319 |
Base = LHS; |
| 1320 |
Offset = RHS; |
1320 |
Offset = RHS; |
| 1321 |
SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); |
1321 |
SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); |
| 1322 |
DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); |
1322 |
DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); |
| 1323 |
// Reg1 + Reg2 is free: no check needed. |
1323 |
// Reg1 + Reg2 is free: no check needed. |
| 1324 |
return true; |
1324 |
return true; |
| 1325 |
} |
1325 |
} |
| 1326 |
|
1326 |
|
| 1327 |
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) { |
1327 |
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) { |
| 1328 |
static const unsigned RegClassIDs[] = { |
1328 |
static const unsigned RegClassIDs[] = { |
| 1329 |
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; |
1329 |
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; |
| 1330 |
static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, |
1330 |
static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, |
| 1331 |
AArch64::dsub2, AArch64::dsub3}; |
1331 |
AArch64::dsub2, AArch64::dsub3}; |
| 1332 |
|
1332 |
|
| 1333 |
return createTuple(Regs, RegClassIDs, SubRegs); |
1333 |
return createTuple(Regs, RegClassIDs, SubRegs); |
| 1334 |
} |
1334 |
} |
| 1335 |
|
1335 |
|
| 1336 |
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) { |
1336 |
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) { |
| 1337 |
static const unsigned RegClassIDs[] = { |
1337 |
static const unsigned RegClassIDs[] = { |
| 1338 |
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; |
1338 |
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; |
| 1339 |
static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, |
1339 |
static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, |
| 1340 |
AArch64::qsub2, AArch64::qsub3}; |
1340 |
AArch64::qsub2, AArch64::qsub3}; |
| 1341 |
|
1341 |
|
| 1342 |
return createTuple(Regs, RegClassIDs, SubRegs); |
1342 |
return createTuple(Regs, RegClassIDs, SubRegs); |
| 1343 |
} |
1343 |
} |
| 1344 |
|
1344 |
|
| 1345 |
SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef Regs) { |
1345 |
SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef Regs) { |
| 1346 |
static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, |
1346 |
static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, |
| 1347 |
AArch64::ZPR3RegClassID, |
1347 |
AArch64::ZPR3RegClassID, |
| 1348 |
AArch64::ZPR4RegClassID}; |
1348 |
AArch64::ZPR4RegClassID}; |
| 1349 |
static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, |
1349 |
static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, |
| 1350 |
AArch64::zsub2, AArch64::zsub3}; |
1350 |
AArch64::zsub2, AArch64::zsub3}; |
| 1351 |
|
1351 |
|
| 1352 |
return createTuple(Regs, RegClassIDs, SubRegs); |
1352 |
return createTuple(Regs, RegClassIDs, SubRegs); |
| 1353 |
} |
1353 |
} |
| 1354 |
|
1354 |
|
| 1355 |
SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef Regs) { |
1355 |
SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef Regs) { |
| 1356 |
assert(Regs.size() == 2 || Regs.size() == 4); |
1356 |
assert(Regs.size() == 2 || Regs.size() == 4); |
| 1357 |
|
1357 |
|
| 1358 |
// The createTuple interface requires 3 RegClassIDs for each possible |
1358 |
// The createTuple interface requires 3 RegClassIDs for each possible |
| 1359 |
// tuple type even though we only have them for ZPR2 and ZPR4. |
1359 |
// tuple type even though we only have them for ZPR2 and ZPR4. |
| 1360 |
static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0, |
1360 |
static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0, |
| 1361 |
AArch64::ZPR4Mul4RegClassID}; |
1361 |
AArch64::ZPR4Mul4RegClassID}; |
| 1362 |
static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, |
1362 |
static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, |
| 1363 |
AArch64::zsub2, AArch64::zsub3}; |
1363 |
AArch64::zsub2, AArch64::zsub3}; |
| 1364 |
return createTuple(Regs, RegClassIDs, SubRegs); |
1364 |
return createTuple(Regs, RegClassIDs, SubRegs); |
| 1365 |
} |
1365 |
} |
| 1366 |
|
1366 |
|
| 1367 |
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, |
1367 |
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, |
| 1368 |
const unsigned RegClassIDs[], |
1368 |
const unsigned RegClassIDs[], |
| 1369 |
const unsigned SubRegs[]) { |
1369 |
const unsigned SubRegs[]) { |
| 1370 |
// There's no special register-class for a vector-list of 1 element: it's just |
1370 |
// There's no special register-class for a vector-list of 1 element: it's just |
| 1371 |
// a vector. |
1371 |
// a vector. |
| 1372 |
if (Regs.size() == 1) |
1372 |
if (Regs.size() == 1) |
| 1373 |
return Regs[0]; |
1373 |
return Regs[0]; |
| 1374 |
|
1374 |
|
| 1375 |
assert(Regs.size() >= 2 && Regs.size() <= 4); |
1375 |
assert(Regs.size() >= 2 && Regs.size() <= 4); |
| 1376 |
|
1376 |
|
| 1377 |
SDLoc DL(Regs[0]); |
1377 |
SDLoc DL(Regs[0]); |
| 1378 |
|
1378 |
|
| 1379 |
SmallVector Ops; |
1379 |
SmallVector Ops; |
| 1380 |
|
1380 |
|
| 1381 |
// First operand of REG_SEQUENCE is the desired RegClass. |
1381 |
// First operand of REG_SEQUENCE is the desired RegClass. |
| 1382 |
Ops.push_back( |
1382 |
Ops.push_back( |
| 1383 |
CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); |
1383 |
CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); |
| 1384 |
|
1384 |
|
| 1385 |
// Then we get pairs of source & subregister-position for the components. |
1385 |
// Then we get pairs of source & subregister-position for the components. |
| 1386 |
for (unsigned i = 0; i < Regs.size(); ++i) { |
1386 |
for (unsigned i = 0; i < Regs.size(); ++i) { |
| 1387 |
Ops.push_back(Regs[i]); |
1387 |
Ops.push_back(Regs[i]); |
| 1388 |
Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); |
1388 |
Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); |
| 1389 |
} |
1389 |
} |
| 1390 |
|
1390 |
|
| 1391 |
SDNode *N = |
1391 |
SDNode *N = |
| 1392 |
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); |
1392 |
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); |
| 1393 |
return SDValue(N, 0); |
1393 |
return SDValue(N, 0); |
| 1394 |
} |
1394 |
} |
| 1395 |
|
1395 |
|
| 1396 |
void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, |
1396 |
void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, |
| 1397 |
bool isExt) { |
1397 |
bool isExt) { |
| 1398 |
SDLoc dl(N); |
1398 |
SDLoc dl(N); |
| 1399 |
EVT VT = N->getValueType(0); |
1399 |
EVT VT = N->getValueType(0); |
| 1400 |
|
1400 |
|
| 1401 |
unsigned ExtOff = isExt; |
1401 |
unsigned ExtOff = isExt; |
| 1402 |
|
1402 |
|
| 1403 |
// Form a REG_SEQUENCE to force register allocation. |
1403 |
// Form a REG_SEQUENCE to force register allocation. |
| 1404 |
unsigned Vec0Off = ExtOff + 1; |
1404 |
unsigned Vec0Off = ExtOff + 1; |
| 1405 |
SmallVector Regs(N->op_begin() + Vec0Off, |
1405 |
SmallVector Regs(N->op_begin() + Vec0Off, |
| 1406 |
N->op_begin() + Vec0Off + NumVecs); |
1406 |
N->op_begin() + Vec0Off + NumVecs); |
| 1407 |
SDValue RegSeq = createQTuple(Regs); |
1407 |
SDValue RegSeq = createQTuple(Regs); |
| 1408 |
|
1408 |
|
| 1409 |
SmallVector Ops; |
1409 |
SmallVector Ops; |
| 1410 |
if (isExt) |
1410 |
if (isExt) |
| 1411 |
Ops.push_back(N->getOperand(1)); |
1411 |
Ops.push_back(N->getOperand(1)); |
| 1412 |
Ops.push_back(RegSeq); |
1412 |
Ops.push_back(RegSeq); |
| 1413 |
Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); |
1413 |
Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); |
| 1414 |
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); |
1414 |
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); |
| 1415 |
} |
1415 |
} |
| 1416 |
|
1416 |
|
| 1417 |
bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { |
1417 |
bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { |
| 1418 |
LoadSDNode *LD = cast(N); |
1418 |
LoadSDNode *LD = cast(N); |
| 1419 |
if (LD->isUnindexed()) |
1419 |
if (LD->isUnindexed()) |
| 1420 |
return false; |
1420 |
return false; |
| 1421 |
EVT VT = LD->getMemoryVT(); |
1421 |
EVT VT = LD->getMemoryVT(); |
| 1422 |
EVT DstVT = N->getValueType(0); |
1422 |
EVT DstVT = N->getValueType(0); |
| 1423 |
ISD::MemIndexedMode AM = LD->getAddressingMode(); |
1423 |
ISD::MemIndexedMode AM = LD->getAddressingMode(); |
| 1424 |
bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; |
1424 |
bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; |
| 1425 |
|
1425 |
|
| 1426 |
// We're not doing validity checking here. That was done when checking |
1426 |
// We're not doing validity checking here. That was done when checking |
| 1427 |
// if we should mark the load as indexed or not. We're just selecting |
1427 |
// if we should mark the load as indexed or not. We're just selecting |
| 1428 |
// the right instruction. |
1428 |
// the right instruction. |
| 1429 |
unsigned Opcode = 0; |
1429 |
unsigned Opcode = 0; |
| 1430 |
|
1430 |
|
| 1431 |
ISD::LoadExtType ExtType = LD->getExtensionType(); |
1431 |
ISD::LoadExtType ExtType = LD->getExtensionType(); |
| 1432 |
bool InsertTo64 = false; |
1432 |
bool InsertTo64 = false; |
| 1433 |
if (VT == MVT::i64) |
1433 |
if (VT == MVT::i64) |
| 1434 |
Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; |
1434 |
Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; |
| 1435 |
else if (VT == MVT::i32) { |
1435 |
else if (VT == MVT::i32) { |
| 1436 |
if (ExtType == ISD::NON_EXTLOAD) |
1436 |
if (ExtType == ISD::NON_EXTLOAD) |
| 1437 |
Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
1437 |
Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
| 1438 |
else if (ExtType == ISD::SEXTLOAD) |
1438 |
else if (ExtType == ISD::SEXTLOAD) |
| 1439 |
Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; |
1439 |
Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; |
| 1440 |
else { |
1440 |
else { |
| 1441 |
Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
1441 |
Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
| 1442 |
InsertTo64 = true; |
1442 |
InsertTo64 = true; |
| 1443 |
// The result of the load is only i32. It's the subreg_to_reg that makes |
1443 |
// The result of the load is only i32. It's the subreg_to_reg that makes |
| 1444 |
// it into an i64. |
1444 |
// it into an i64. |
| 1445 |
DstVT = MVT::i32; |
1445 |
DstVT = MVT::i32; |
| 1446 |
} |
1446 |
} |
| 1447 |
} else if (VT == MVT::i16) { |
1447 |
} else if (VT == MVT::i16) { |
| 1448 |
if (ExtType == ISD::SEXTLOAD) { |
1448 |
if (ExtType == ISD::SEXTLOAD) { |
| 1449 |
if (DstVT == MVT::i64) |
1449 |
if (DstVT == MVT::i64) |
| 1450 |
Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; |
1450 |
Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; |
| 1451 |
else |
1451 |
else |
| 1452 |
Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; |
1452 |
Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; |
| 1453 |
} else { |
1453 |
} else { |
| 1454 |
Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; |
1454 |
Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; |
| 1455 |
InsertTo64 = DstVT == MVT::i64; |
1455 |
InsertTo64 = DstVT == MVT::i64; |
| 1456 |
// The result of the load is only i32. It's the subreg_to_reg that makes |
1456 |
// The result of the load is only i32. It's the subreg_to_reg that makes |
| 1457 |
// it into an i64. |
1457 |
// it into an i64. |
| 1458 |
DstVT = MVT::i32; |
1458 |
DstVT = MVT::i32; |
| 1459 |
} |
1459 |
} |
| 1460 |
} else if (VT == MVT::i8) { |
1460 |
} else if (VT == MVT::i8) { |
| 1461 |
if (ExtType == ISD::SEXTLOAD) { |
1461 |
if (ExtType == ISD::SEXTLOAD) { |
| 1462 |
if (DstVT == MVT::i64) |
1462 |
if (DstVT == MVT::i64) |
| 1463 |
Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; |
1463 |
Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; |
| 1464 |
else |
1464 |
else |
| 1465 |
Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; |
1465 |
Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; |
| 1466 |
} else { |
1466 |
} else { |
| 1467 |
Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; |
1467 |
Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; |
| 1468 |
InsertTo64 = DstVT == MVT::i64; |
1468 |
InsertTo64 = DstVT == MVT::i64; |
| 1469 |
// The result of the load is only i32. It's the subreg_to_reg that makes |
1469 |
// The result of the load is only i32. It's the subreg_to_reg that makes |
| 1470 |
// it into an i64. |
1470 |
// it into an i64. |
| 1471 |
DstVT = MVT::i32; |
1471 |
DstVT = MVT::i32; |
| 1472 |
} |
1472 |
} |
| 1473 |
} else if (VT == MVT::f16) { |
1473 |
} else if (VT == MVT::f16) { |
| 1474 |
Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; |
1474 |
Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; |
| 1475 |
} else if (VT == MVT::bf16) { |
1475 |
} else if (VT == MVT::bf16) { |
| 1476 |
Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; |
1476 |
Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; |
| 1477 |
} else if (VT == MVT::f32) { |
1477 |
} else if (VT == MVT::f32) { |
| 1478 |
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; |
1478 |
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; |
| 1479 |
} else if (VT == MVT::f64 || VT.is64BitVector()) { |
1479 |
} else if (VT == MVT::f64 || VT.is64BitVector()) { |
| 1480 |
Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; |
1480 |
Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; |
| 1481 |
} else if (VT.is128BitVector()) { |
1481 |
} else if (VT.is128BitVector()) { |
| 1482 |
Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; |
1482 |
Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; |
| 1483 |
} else |
1483 |
} else |
| 1484 |
return false; |
1484 |
return false; |
| 1485 |
SDValue Chain = LD->getChain(); |
1485 |
SDValue Chain = LD->getChain(); |
| 1486 |
SDValue Base = LD->getBasePtr(); |
1486 |
SDValue Base = LD->getBasePtr(); |
| 1487 |
ConstantSDNode *OffsetOp = cast(LD->getOffset()); |
1487 |
ConstantSDNode *OffsetOp = cast(LD->getOffset()); |
| 1488 |
int OffsetVal = (int)OffsetOp->getZExtValue(); |
1488 |
int OffsetVal = (int)OffsetOp->getZExtValue(); |
| 1489 |
SDLoc dl(N); |
1489 |
SDLoc dl(N); |
| 1490 |
SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); |
1490 |
SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); |
| 1491 |
SDValue Ops[] = { Base, Offset, Chain }; |
1491 |
SDValue Ops[] = { Base, Offset, Chain }; |
| 1492 |
SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, |
1492 |
SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, |
| 1493 |
MVT::Other, Ops); |
1493 |
MVT::Other, Ops); |
| 1494 |
|
1494 |
|
| 1495 |
// Transfer memoperands. |
1495 |
// Transfer memoperands. |
| 1496 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
1496 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
| 1497 |
CurDAG->setNodeMemRefs(cast(Res), {MemOp}); |
1497 |
CurDAG->setNodeMemRefs(cast(Res), {MemOp}); |
| 1498 |
|
1498 |
|
| 1499 |
// Either way, we're replacing the node, so tell the caller that. |
1499 |
// Either way, we're replacing the node, so tell the caller that. |
| 1500 |
SDValue LoadedVal = SDValue(Res, 1); |
1500 |
SDValue LoadedVal = SDValue(Res, 1); |
| 1501 |
if (InsertTo64) { |
1501 |
if (InsertTo64) { |
| 1502 |
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); |
1502 |
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); |
| 1503 |
LoadedVal = |
1503 |
LoadedVal = |
| 1504 |
SDValue(CurDAG->getMachineNode( |
1504 |
SDValue(CurDAG->getMachineNode( |
| 1505 |
AArch64::SUBREG_TO_REG, dl, MVT::i64, |
1505 |
AArch64::SUBREG_TO_REG, dl, MVT::i64, |
| 1506 |
CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, |
1506 |
CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, |
| 1507 |
SubReg), |
1507 |
SubReg), |
| 1508 |
0); |
1508 |
0); |
| 1509 |
} |
1509 |
} |
| 1510 |
|
1510 |
|
| 1511 |
ReplaceUses(SDValue(N, 0), LoadedVal); |
1511 |
ReplaceUses(SDValue(N, 0), LoadedVal); |
| 1512 |
ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); |
1512 |
ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); |
| 1513 |
ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); |
1513 |
ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); |
| 1514 |
CurDAG->RemoveDeadNode(N); |
1514 |
CurDAG->RemoveDeadNode(N); |
| 1515 |
return true; |
1515 |
return true; |
| 1516 |
} |
1516 |
} |
| 1517 |
|
1517 |
|
| 1518 |
void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
1518 |
void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, |
| 1519 |
unsigned SubRegIdx) { |
1519 |
unsigned SubRegIdx) { |
| 1520 |
SDLoc dl(N); |
1520 |
SDLoc dl(N); |
| 1521 |
EVT VT = N->getValueType(0); |
1521 |
EVT VT = N->getValueType(0); |
| 1522 |
SDValue Chain = N->getOperand(0); |
1522 |
SDValue Chain = N->getOperand(0); |
| 1523 |
|
1523 |
|
| 1524 |
SDValue Ops[] = {N->getOperand(2), // Mem operand; |
1524 |
SDValue Ops[] = {N->getOperand(2), // Mem operand; |
| 1525 |
Chain}; |
1525 |
Chain}; |
| 1526 |
|
1526 |
|
| 1527 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
1527 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
| 1528 |
|
1528 |
|
| 1529 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
1529 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| 1530 |
SDValue SuperReg = SDValue(Ld, 0); |
1530 |
SDValue SuperReg = SDValue(Ld, 0); |
| 1531 |
for (unsigned i = 0; i < NumVecs; ++i) |
1531 |
for (unsigned i = 0; i < NumVecs; ++i) |
| 1532 |
ReplaceUses(SDValue(N, i), |
1532 |
ReplaceUses(SDValue(N, i), |
| 1533 |
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); |
1533 |
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); |
| 1534 |
|
1534 |
|
| 1535 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); |
1535 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); |
| 1536 |
|
1536 |
|
| 1537 |
// Transfer memoperands. In the case of AArch64::LD64B, there won't be one, |
1537 |
// Transfer memoperands. In the case of AArch64::LD64B, there won't be one, |
| 1538 |
// because it's too simple to have needed special treatment during lowering. |
1538 |
// because it's too simple to have needed special treatment during lowering. |
| 1539 |
if (auto *MemIntr = dyn_cast(N)) { |
1539 |
if (auto *MemIntr = dyn_cast(N)) { |
| 1540 |
MachineMemOperand *MemOp = MemIntr->getMemOperand(); |
1540 |
MachineMemOperand *MemOp = MemIntr->getMemOperand(); |
| 1541 |
CurDAG->setNodeMemRefs(cast(Ld), {MemOp}); |
1541 |
CurDAG->setNodeMemRefs(cast(Ld), {MemOp}); |
| 1542 |
} |
1542 |
} |
| 1543 |
|
1543 |
|
| 1544 |
CurDAG->RemoveDeadNode(N); |
1544 |
CurDAG->RemoveDeadNode(N); |
| 1545 |
} |
1545 |
} |
| 1546 |
|
1546 |
|
| 1547 |
void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, |
1547 |
void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, |
| 1548 |
unsigned Opc, unsigned SubRegIdx) { |
1548 |
unsigned Opc, unsigned SubRegIdx) { |
| 1549 |
SDLoc dl(N); |
1549 |
SDLoc dl(N); |
| 1550 |
EVT VT = N->getValueType(0); |
1550 |
EVT VT = N->getValueType(0); |
| 1551 |
SDValue Chain = N->getOperand(0); |
1551 |
SDValue Chain = N->getOperand(0); |
| 1552 |
|
1552 |
|
| 1553 |
SDValue Ops[] = {N->getOperand(1), // Mem operand |
1553 |
SDValue Ops[] = {N->getOperand(1), // Mem operand |
| 1554 |
N->getOperand(2), // Incremental |
1554 |
N->getOperand(2), // Incremental |
| 1555 |
Chain}; |
1555 |
Chain}; |
| 1556 |
|
1556 |
|
| 1557 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
1557 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| 1558 |
MVT::Untyped, MVT::Other}; |
1558 |
MVT::Untyped, MVT::Other}; |
| 1559 |
|
1559 |
|
| 1560 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
1560 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| 1561 |
|
1561 |
|
| 1562 |
// Update uses of write back register |
1562 |
// Update uses of write back register |
| 1563 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); |
1563 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); |
| 1564 |
|
1564 |
|
| 1565 |
// Update uses of vector list |
1565 |
// Update uses of vector list |
| 1566 |
SDValue SuperReg = SDValue(Ld, 1); |
1566 |
SDValue SuperReg = SDValue(Ld, 1); |
| 1567 |
if (NumVecs == 1) |
1567 |
if (NumVecs == 1) |
| 1568 |
ReplaceUses(SDValue(N, 0), SuperReg); |
1568 |
ReplaceUses(SDValue(N, 0), SuperReg); |
| 1569 |
else |
1569 |
else |
| 1570 |
for (unsigned i = 0; i < NumVecs; ++i) |
1570 |
for (unsigned i = 0; i < NumVecs; ++i) |
| 1571 |
ReplaceUses(SDValue(N, i), |
1571 |
ReplaceUses(SDValue(N, i), |
| 1572 |
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); |
1572 |
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); |
| 1573 |
|
1573 |
|
| 1574 |
// Update the chain |
1574 |
// Update the chain |
| 1575 |
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); |
1575 |
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); |
| 1576 |
CurDAG->RemoveDeadNode(N); |
1576 |
CurDAG->RemoveDeadNode(N); |
| 1577 |
} |
1577 |
} |
| 1578 |
|
1578 |
|
| 1579 |
/// Optimize \param OldBase and \param OldOffset selecting the best addressing |
1579 |
/// Optimize \param OldBase and \param OldOffset selecting the best addressing |
| 1580 |
/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the |
1580 |
/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the |
| 1581 |
/// new Base and an SDValue representing the new offset. |
1581 |
/// new Base and an SDValue representing the new offset. |
| 1582 |
std::tuple |
1582 |
std::tuple |
| 1583 |
AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, |
1583 |
AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, |
| 1584 |
unsigned Opc_ri, |
1584 |
unsigned Opc_ri, |
| 1585 |
const SDValue &OldBase, |
1585 |
const SDValue &OldBase, |
| 1586 |
const SDValue &OldOffset, |
1586 |
const SDValue &OldOffset, |
| 1587 |
unsigned Scale) { |
1587 |
unsigned Scale) { |
| 1588 |
SDValue NewBase = OldBase; |
1588 |
SDValue NewBase = OldBase; |
| 1589 |
SDValue NewOffset = OldOffset; |
1589 |
SDValue NewOffset = OldOffset; |
| 1590 |
// Detect a possible Reg+Imm addressing mode. |
1590 |
// Detect a possible Reg+Imm addressing mode. |
| 1591 |
const bool IsRegImm = SelectAddrModeIndexedSVE*Min=*/-8, /*Max=*/7>( |
1591 |
const bool IsRegImm = SelectAddrModeIndexedSVE*Min=*/-8, /*Max=*/7>( |
| 1592 |
N, OldBase, NewBase, NewOffset); |
1592 |
N, OldBase, NewBase, NewOffset); |
| 1593 |
|
1593 |
|
| 1594 |
// Detect a possible reg+reg addressing mode, but only if we haven't already |
1594 |
// Detect a possible reg+reg addressing mode, but only if we haven't already |
| 1595 |
// detected a Reg+Imm one. |
1595 |
// detected a Reg+Imm one. |
| 1596 |
const bool IsRegReg = |
1596 |
const bool IsRegReg = |
| 1597 |
!IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); |
1597 |
!IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); |
| 1598 |
|
1598 |
|
| 1599 |
// Select the instruction. |
1599 |
// Select the instruction. |
| 1600 |
return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); |
1600 |
return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); |
| 1601 |
} |
1601 |
} |
| 1602 |
|
1602 |
|
| 1603 |
enum class SelectTypeKind { |
1603 |
enum class SelectTypeKind { |
| 1604 |
Int1 = 0, |
1604 |
Int1 = 0, |
| 1605 |
Int = 1, |
1605 |
Int = 1, |
| 1606 |
FP = 2, |
1606 |
FP = 2, |
| 1607 |
AnyType = 3, |
1607 |
AnyType = 3, |
| 1608 |
}; |
1608 |
}; |
| 1609 |
|
1609 |
|
| 1610 |
/// This function selects an opcode from a list of opcodes, which is |
1610 |
/// This function selects an opcode from a list of opcodes, which is |
| 1611 |
/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } |
1611 |
/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } |
| 1612 |
/// element types, in this order. |
1612 |
/// element types, in this order. |
| 1613 |
template |
1613 |
template |
| 1614 |
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef Opcodes) { |
1614 |
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef Opcodes) { |
| 1615 |
// Only match scalable vector VTs |
1615 |
// Only match scalable vector VTs |
| 1616 |
if (!VT.isScalableVector()) |
1616 |
if (!VT.isScalableVector()) |
| 1617 |
return 0; |
1617 |
return 0; |
| 1618 |
|
1618 |
|
| 1619 |
EVT EltVT = VT.getVectorElementType(); |
1619 |
EVT EltVT = VT.getVectorElementType(); |
| 1620 |
switch (Kind) { |
1620 |
switch (Kind) { |
| 1621 |
case SelectTypeKind::AnyType: |
1621 |
case SelectTypeKind::AnyType: |
| 1622 |
break; |
1622 |
break; |
| 1623 |
case SelectTypeKind::Int: |
1623 |
case SelectTypeKind::Int: |
| 1624 |
if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 && |
1624 |
if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 && |
| 1625 |
EltVT != MVT::i64) |
1625 |
EltVT != MVT::i64) |
| 1626 |
return 0; |
1626 |
return 0; |
| 1627 |
break; |
1627 |
break; |
| 1628 |
case SelectTypeKind::Int1: |
1628 |
case SelectTypeKind::Int1: |
| 1629 |
if (EltVT != MVT::i1) |
1629 |
if (EltVT != MVT::i1) |
| 1630 |
return 0; |
1630 |
return 0; |
| 1631 |
break; |
1631 |
break; |
| 1632 |
case SelectTypeKind::FP: |
1632 |
case SelectTypeKind::FP: |
| 1633 |
if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) |
1633 |
if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) |
| 1634 |
return 0; |
1634 |
return 0; |
| 1635 |
break; |
1635 |
break; |
| 1636 |
} |
1636 |
} |
| 1637 |
|
1637 |
|
| 1638 |
unsigned Offset; |
1638 |
unsigned Offset; |
| 1639 |
switch (VT.getVectorMinNumElements()) { |
1639 |
switch (VT.getVectorMinNumElements()) { |
| 1640 |
case 16: // 8-bit |
1640 |
case 16: // 8-bit |
| 1641 |
Offset = 0; |
1641 |
Offset = 0; |
| 1642 |
break; |
1642 |
break; |
| 1643 |
case 8: // 16-bit |
1643 |
case 8: // 16-bit |
| 1644 |
Offset = 1; |
1644 |
Offset = 1; |
| 1645 |
break; |
1645 |
break; |
| 1646 |
case 4: // 32-bit |
1646 |
case 4: // 32-bit |
| 1647 |
Offset = 2; |
1647 |
Offset = 2; |
| 1648 |
break; |
1648 |
break; |
| 1649 |
case 2: // 64-bit |
1649 |
case 2: // 64-bit |
| 1650 |
Offset = 3; |
1650 |
Offset = 3; |
| 1651 |
break; |
1651 |
break; |
| 1652 |
default: |
1652 |
default: |
| 1653 |
return 0; |
1653 |
return 0; |
| 1654 |
} |
1654 |
} |
| 1655 |
|
1655 |
|
| 1656 |
return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; |
1656 |
return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; |
| 1657 |
} |
1657 |
} |
| 1658 |
|
1658 |
|
| 1659 |
// This function is almost identical to SelectWhilePair, but has an |
1659 |
// This function is almost identical to SelectWhilePair, but has an |
| 1660 |
// extra check on the range of the immediate operand. |
1660 |
// extra check on the range of the immediate operand. |
| 1661 |
// TODO: Merge these two functions together at some point? |
1661 |
// TODO: Merge these two functions together at some point? |
| 1662 |
void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) { |
1662 |
void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) { |
| 1663 |
// Immediate can be either 0 or 1. |
1663 |
// Immediate can be either 0 or 1. |
| 1664 |
if (ConstantSDNode *Imm = dyn_cast(N->getOperand(2))) |
1664 |
if (ConstantSDNode *Imm = dyn_cast(N->getOperand(2))) |
| 1665 |
if (Imm->getZExtValue() > 1) |
1665 |
if (Imm->getZExtValue() > 1) |
| 1666 |
return; |
1666 |
return; |
| 1667 |
|
1667 |
|
| 1668 |
SDLoc DL(N); |
1668 |
SDLoc DL(N); |
| 1669 |
EVT VT = N->getValueType(0); |
1669 |
EVT VT = N->getValueType(0); |
| 1670 |
SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; |
1670 |
SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; |
| 1671 |
SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); |
1671 |
SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); |
| 1672 |
SDValue SuperReg = SDValue(WhilePair, 0); |
1672 |
SDValue SuperReg = SDValue(WhilePair, 0); |
| 1673 |
|
1673 |
|
| 1674 |
for (unsigned I = 0; I < 2; ++I) |
1674 |
for (unsigned I = 0; I < 2; ++I) |
| 1675 |
ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( |
1675 |
ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( |
| 1676 |
AArch64::psub0 + I, DL, VT, SuperReg)); |
1676 |
AArch64::psub0 + I, DL, VT, SuperReg)); |
| 1677 |
|
1677 |
|
| 1678 |
CurDAG->RemoveDeadNode(N); |
1678 |
CurDAG->RemoveDeadNode(N); |
| 1679 |
} |
1679 |
} |
| 1680 |
|
1680 |
|
| 1681 |
void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { |
1681 |
void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { |
| 1682 |
SDLoc DL(N); |
1682 |
SDLoc DL(N); |
| 1683 |
EVT VT = N->getValueType(0); |
1683 |
EVT VT = N->getValueType(0); |
| 1684 |
|
1684 |
|
| 1685 |
SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; |
1685 |
SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; |
| 1686 |
|
1686 |
|
| 1687 |
SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); |
1687 |
SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); |
| 1688 |
SDValue SuperReg = SDValue(WhilePair, 0); |
1688 |
SDValue SuperReg = SDValue(WhilePair, 0); |
| 1689 |
|
1689 |
|
| 1690 |
for (unsigned I = 0; I < 2; ++I) |
1690 |
for (unsigned I = 0; I < 2; ++I) |
| 1691 |
ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( |
1691 |
ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( |
| 1692 |
AArch64::psub0 + I, DL, VT, SuperReg)); |
1692 |
AArch64::psub0 + I, DL, VT, SuperReg)); |
| 1693 |
|
1693 |
|
| 1694 |
CurDAG->RemoveDeadNode(N); |
1694 |
CurDAG->RemoveDeadNode(N); |
| 1695 |
} |
1695 |
} |
| 1696 |
|
1696 |
|
| 1697 |
void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, |
1697 |
void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, |
| 1698 |
unsigned Opcode) { |
1698 |
unsigned Opcode) { |
| 1699 |
EVT VT = N->getValueType(0); |
1699 |
EVT VT = N->getValueType(0); |
| 1700 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
1700 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| 1701 |
SDValue Ops = createZTuple(Regs); |
1701 |
SDValue Ops = createZTuple(Regs); |
| 1702 |
SDLoc DL(N); |
1702 |
SDLoc DL(N); |
| 1703 |
SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); |
1703 |
SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); |
| 1704 |
SDValue SuperReg = SDValue(Intrinsic, 0); |
1704 |
SDValue SuperReg = SDValue(Intrinsic, 0); |
| 1705 |
for (unsigned i = 0; i < NumVecs; ++i) |
1705 |
for (unsigned i = 0; i < NumVecs; ++i) |
| 1706 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
1706 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
| 1707 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
1707 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
| 1708 |
|
1708 |
|
| 1709 |
CurDAG->RemoveDeadNode(N); |
1709 |
CurDAG->RemoveDeadNode(N); |
| 1710 |
} |
1710 |
} |
| 1711 |
|
1711 |
|
| 1712 |
void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, |
1712 |
void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, |
| 1713 |
unsigned NumVecs, |
1713 |
unsigned NumVecs, |
| 1714 |
bool IsZmMulti, |
1714 |
bool IsZmMulti, |
| 1715 |
unsigned Opcode, |
1715 |
unsigned Opcode, |
| 1716 |
bool HasPred) { |
1716 |
bool HasPred) { |
| 1717 |
assert(Opcode != 0 && "Unexpected opcode"); |
1717 |
assert(Opcode != 0 && "Unexpected opcode"); |
| 1718 |
|
1718 |
|
| 1719 |
SDLoc DL(N); |
1719 |
SDLoc DL(N); |
| 1720 |
EVT VT = N->getValueType(0); |
1720 |
EVT VT = N->getValueType(0); |
| 1721 |
unsigned FirstVecIdx = HasPred ? 2 : 1; |
1721 |
unsigned FirstVecIdx = HasPred ? 2 : 1; |
| 1722 |
|
1722 |
|
| 1723 |
auto GetMultiVecOperand = [=](unsigned StartIdx) { |
1723 |
auto GetMultiVecOperand = [=](unsigned StartIdx) { |
| 1724 |
SmallVector Regs(N->op_begin() + StartIdx, |
1724 |
SmallVector Regs(N->op_begin() + StartIdx, |
| 1725 |
N->op_begin() + StartIdx + NumVecs); |
1725 |
N->op_begin() + StartIdx + NumVecs); |
| 1726 |
return createZMulTuple(Regs); |
1726 |
return createZMulTuple(Regs); |
| 1727 |
}; |
1727 |
}; |
| 1728 |
|
1728 |
|
| 1729 |
SDValue Zdn = GetMultiVecOperand(FirstVecIdx); |
1729 |
SDValue Zdn = GetMultiVecOperand(FirstVecIdx); |
| 1730 |
|
1730 |
|
| 1731 |
SDValue Zm; |
1731 |
SDValue Zm; |
| 1732 |
if (IsZmMulti) |
1732 |
if (IsZmMulti) |
| 1733 |
Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); |
1733 |
Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); |
| 1734 |
else |
1734 |
else |
| 1735 |
Zm = N->getOperand(NumVecs + FirstVecIdx); |
1735 |
Zm = N->getOperand(NumVecs + FirstVecIdx); |
| 1736 |
|
1736 |
|
| 1737 |
SDNode *Intrinsic; |
1737 |
SDNode *Intrinsic; |
| 1738 |
if (HasPred) |
1738 |
if (HasPred) |
| 1739 |
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, |
1739 |
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, |
| 1740 |
N->getOperand(1), Zdn, Zm); |
1740 |
N->getOperand(1), Zdn, Zm); |
| 1741 |
else |
1741 |
else |
| 1742 |
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); |
1742 |
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); |
| 1743 |
SDValue SuperReg = SDValue(Intrinsic, 0); |
1743 |
SDValue SuperReg = SDValue(Intrinsic, 0); |
| 1744 |
for (unsigned i = 0; i < NumVecs; ++i) |
1744 |
for (unsigned i = 0; i < NumVecs; ++i) |
| 1745 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
1745 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
| 1746 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
1746 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
| 1747 |
|
1747 |
|
| 1748 |
CurDAG->RemoveDeadNode(N); |
1748 |
CurDAG->RemoveDeadNode(N); |
| 1749 |
} |
1749 |
} |
| 1750 |
|
1750 |
|
| 1751 |
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, |
1751 |
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, |
| 1752 |
unsigned Scale, unsigned Opc_ri, |
1752 |
unsigned Scale, unsigned Opc_ri, |
| 1753 |
unsigned Opc_rr, bool IsIntr) { |
1753 |
unsigned Opc_rr, bool IsIntr) { |
| 1754 |
assert(Scale < 4 && "Invalid scaling value."); |
1754 |
assert(Scale < 4 && "Invalid scaling value."); |
| 1755 |
SDLoc DL(N); |
1755 |
SDLoc DL(N); |
| 1756 |
EVT VT = N->getValueType(0); |
1756 |
EVT VT = N->getValueType(0); |
| 1757 |
SDValue Chain = N->getOperand(0); |
1757 |
SDValue Chain = N->getOperand(0); |
| 1758 |
|
1758 |
|
| 1759 |
// Optimize addressing mode. |
1759 |
// Optimize addressing mode. |
| 1760 |
SDValue Base, Offset; |
1760 |
SDValue Base, Offset; |
| 1761 |
unsigned Opc; |
1761 |
unsigned Opc; |
| 1762 |
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( |
1762 |
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( |
| 1763 |
N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), |
1763 |
N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), |
| 1764 |
CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); |
1764 |
CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); |
| 1765 |
|
1765 |
|
| 1766 |
SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate |
1766 |
SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate |
| 1767 |
Base, // Memory operand |
1767 |
Base, // Memory operand |
| 1768 |
Offset, Chain}; |
1768 |
Offset, Chain}; |
| 1769 |
|
1769 |
|
| 1770 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
1770 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
| 1771 |
|
1771 |
|
| 1772 |
SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); |
1772 |
SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); |
| 1773 |
SDValue SuperReg = SDValue(Load, 0); |
1773 |
SDValue SuperReg = SDValue(Load, 0); |
| 1774 |
for (unsigned i = 0; i < NumVecs; ++i) |
1774 |
for (unsigned i = 0; i < NumVecs; ++i) |
| 1775 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
1775 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
| 1776 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
1776 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
| 1777 |
|
1777 |
|
| 1778 |
// Copy chain |
1778 |
// Copy chain |
| 1779 |
unsigned ChainIdx = NumVecs; |
1779 |
unsigned ChainIdx = NumVecs; |
| 1780 |
ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); |
1780 |
ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); |
| 1781 |
CurDAG->RemoveDeadNode(N); |
1781 |
CurDAG->RemoveDeadNode(N); |
| 1782 |
} |
1782 |
} |
| 1783 |
|
1783 |
|
| 1784 |
void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, |
1784 |
void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, |
| 1785 |
unsigned NumVecs, |
1785 |
unsigned NumVecs, |
| 1786 |
unsigned Scale, |
1786 |
unsigned Scale, |
| 1787 |
unsigned Opc_ri, |
1787 |
unsigned Opc_ri, |
| 1788 |
unsigned Opc_rr) { |
1788 |
unsigned Opc_rr) { |
| 1789 |
assert(Scale < 4 && "Invalid scaling value."); |
1789 |
assert(Scale < 4 && "Invalid scaling value."); |
| 1790 |
SDLoc DL(N); |
1790 |
SDLoc DL(N); |
| 1791 |
EVT VT = N->getValueType(0); |
1791 |
EVT VT = N->getValueType(0); |
| 1792 |
SDValue Chain = N->getOperand(0); |
1792 |
SDValue Chain = N->getOperand(0); |
| 1793 |
|
1793 |
|
| 1794 |
SDValue PNg = N->getOperand(2); |
1794 |
SDValue PNg = N->getOperand(2); |
| 1795 |
SDValue Base = N->getOperand(3); |
1795 |
SDValue Base = N->getOperand(3); |
| 1796 |
SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); |
1796 |
SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); |
| 1797 |
unsigned Opc; |
1797 |
unsigned Opc; |
| 1798 |
std::tie(Opc, Base, Offset) = |
1798 |
std::tie(Opc, Base, Offset) = |
| 1799 |
findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale); |
1799 |
findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale); |
| 1800 |
|
1800 |
|
| 1801 |
SDValue Ops[] = {PNg, // Predicate-as-counter |
1801 |
SDValue Ops[] = {PNg, // Predicate-as-counter |
| 1802 |
Base, // Memory operand |
1802 |
Base, // Memory operand |
| 1803 |
Offset, Chain}; |
1803 |
Offset, Chain}; |
| 1804 |
|
1804 |
|
| 1805 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
1805 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
| 1806 |
|
1806 |
|
| 1807 |
SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); |
1807 |
SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); |
| 1808 |
SDValue SuperReg = SDValue(Load, 0); |
1808 |
SDValue SuperReg = SDValue(Load, 0); |
| 1809 |
for (unsigned i = 0; i < NumVecs; ++i) |
1809 |
for (unsigned i = 0; i < NumVecs; ++i) |
| 1810 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
1810 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
| 1811 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
1811 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
| 1812 |
|
1812 |
|
| 1813 |
// Copy chain |
1813 |
// Copy chain |
| 1814 |
unsigned ChainIdx = NumVecs; |
1814 |
unsigned ChainIdx = NumVecs; |
| 1815 |
ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); |
1815 |
ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); |
| 1816 |
CurDAG->RemoveDeadNode(N); |
1816 |
CurDAG->RemoveDeadNode(N); |
| 1817 |
} |
1817 |
} |
| 1818 |
|
1818 |
|
| 1819 |
void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, |
1819 |
void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, |
| 1820 |
unsigned Opcode) { |
1820 |
unsigned Opcode) { |
| 1821 |
if (N->getValueType(0) != MVT::nxv4f32) |
1821 |
if (N->getValueType(0) != MVT::nxv4f32) |
| 1822 |
return; |
1822 |
return; |
| 1823 |
SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); |
1823 |
SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); |
| 1824 |
} |
1824 |
} |
| 1825 |
|
1825 |
|
| 1826 |
void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, |
1826 |
void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, |
| 1827 |
unsigned Op) { |
1827 |
unsigned Op) { |
| 1828 |
SDLoc DL(N); |
1828 |
SDLoc DL(N); |
| 1829 |
EVT VT = N->getValueType(0); |
1829 |
EVT VT = N->getValueType(0); |
| 1830 |
|
1830 |
|
| 1831 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
1831 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| 1832 |
SDValue Zd = createZMulTuple(Regs); |
1832 |
SDValue Zd = createZMulTuple(Regs); |
| 1833 |
SDValue Zn = N->getOperand(1 + NumVecs); |
1833 |
SDValue Zn = N->getOperand(1 + NumVecs); |
| 1834 |
SDValue Zm = N->getOperand(2 + NumVecs); |
1834 |
SDValue Zm = N->getOperand(2 + NumVecs); |
| 1835 |
|
1835 |
|
| 1836 |
SDValue Ops[] = {Zd, Zn, Zm}; |
1836 |
SDValue Ops[] = {Zd, Zn, Zm}; |
| 1837 |
|
1837 |
|
| 1838 |
SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops); |
1838 |
SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops); |
| 1839 |
SDValue SuperReg = SDValue(Intrinsic, 0); |
1839 |
SDValue SuperReg = SDValue(Intrinsic, 0); |
| 1840 |
for (unsigned i = 0; i < NumVecs; ++i) |
1840 |
for (unsigned i = 0; i < NumVecs; ++i) |
| 1841 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
1841 |
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( |
| 1842 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
1842 |
AArch64::zsub0 + i, DL, VT, SuperReg)); |
| 1843 |
|
1843 |
|
| 1844 |
CurDAG->RemoveDeadNode(N); |
1844 |
CurDAG->RemoveDeadNode(N); |
| 1845 |
} |
1845 |
} |
| 1846 |
|
1846 |
|
| 1847 |
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) { |
1847 |
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) { |
| 1848 |
switch (BaseReg) { |
1848 |
switch (BaseReg) { |
| 1849 |
default: |
1849 |
default: |
| 1850 |
return false; |
1850 |
return false; |
| 1851 |
case AArch64::ZA: |
1851 |
case AArch64::ZA: |
| 1852 |
case AArch64::ZAB0: |
1852 |
case AArch64::ZAB0: |
| 1853 |
if (TileNum == 0) |
1853 |
if (TileNum == 0) |
| 1854 |
break; |
1854 |
break; |
| 1855 |
return false; |
1855 |
return false; |
| 1856 |
case AArch64::ZAH0: |
1856 |
case AArch64::ZAH0: |
| 1857 |
if (TileNum <= 1) |
1857 |
if (TileNum <= 1) |
| 1858 |
break; |
1858 |
break; |
| 1859 |
return false; |
1859 |
return false; |
| 1860 |
case AArch64::ZAS0: |
1860 |
case AArch64::ZAS0: |
| 1861 |
if (TileNum <= 3) |
1861 |
if (TileNum <= 3) |
| 1862 |
break; |
1862 |
break; |
| 1863 |
return false; |
1863 |
return false; |
| 1864 |
case AArch64::ZAD0: |
1864 |
case AArch64::ZAD0: |
| 1865 |
if (TileNum <= 7) |
1865 |
if (TileNum <= 7) |
| 1866 |
break; |
1866 |
break; |
| 1867 |
return false; |
1867 |
return false; |
| 1868 |
} |
1868 |
} |
| 1869 |
|
1869 |
|
| 1870 |
BaseReg += TileNum; |
1870 |
BaseReg += TileNum; |
| 1871 |
return true; |
1871 |
return true; |
| 1872 |
} |
1872 |
} |
| 1873 |
|
1873 |
|
| 1874 |
template |
1874 |
template |
| 1875 |
void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, |
1875 |
void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, |
| 1876 |
unsigned BaseReg, unsigned Op) { |
1876 |
unsigned BaseReg, unsigned Op) { |
| 1877 |
unsigned TileNum = 0; |
1877 |
unsigned TileNum = 0; |
| 1878 |
if (BaseReg != AArch64::ZA) |
1878 |
if (BaseReg != AArch64::ZA) |
| 1879 |
TileNum = cast(N->getOperand(2))->getZExtValue(); |
1879 |
TileNum = cast(N->getOperand(2))->getZExtValue(); |
| 1880 |
|
1880 |
|
| 1881 |
if (!SelectSMETile(BaseReg, TileNum)) |
1881 |
if (!SelectSMETile(BaseReg, TileNum)) |
| 1882 |
return; |
1882 |
return; |
| 1883 |
|
1883 |
|
| 1884 |
SDValue SliceBase, Base, Offset; |
1884 |
SDValue SliceBase, Base, Offset; |
| 1885 |
if (BaseReg == AArch64::ZA) |
1885 |
if (BaseReg == AArch64::ZA) |
| 1886 |
SliceBase = N->getOperand(2); |
1886 |
SliceBase = N->getOperand(2); |
| 1887 |
else |
1887 |
else |
| 1888 |
SliceBase = N->getOperand(3); |
1888 |
SliceBase = N->getOperand(3); |
| 1889 |
|
1889 |
|
| 1890 |
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) |
1890 |
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale)) |
| 1891 |
return; |
1891 |
return; |
| 1892 |
|
1892 |
|
| 1893 |
SDLoc DL(N); |
1893 |
SDLoc DL(N); |
| 1894 |
SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other); |
1894 |
SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other); |
| 1895 |
SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)}; |
1895 |
SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)}; |
| 1896 |
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); |
1896 |
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops); |
| 1897 |
|
1897 |
|
| 1898 |
EVT VT = N->getValueType(0); |
1898 |
EVT VT = N->getValueType(0); |
| 1899 |
for (unsigned I = 0; I < NumVecs; ++I) |
1899 |
for (unsigned I = 0; I < NumVecs; ++I) |
| 1900 |
ReplaceUses(SDValue(N, I), |
1900 |
ReplaceUses(SDValue(N, I), |
| 1901 |
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, |
1901 |
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, |
| 1902 |
SDValue(Mov, 0))); |
1902 |
SDValue(Mov, 0))); |
| 1903 |
// Copy chain |
1903 |
// Copy chain |
| 1904 |
unsigned ChainIdx = NumVecs; |
1904 |
unsigned ChainIdx = NumVecs; |
| 1905 |
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); |
1905 |
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1)); |
| 1906 |
CurDAG->RemoveDeadNode(N); |
1906 |
CurDAG->RemoveDeadNode(N); |
| 1907 |
} |
1907 |
} |
| 1908 |
|
1908 |
|
| 1909 |
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N, |
1909 |
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N, |
| 1910 |
unsigned NumOutVecs, |
1910 |
unsigned NumOutVecs, |
| 1911 |
bool IsTupleInput, |
1911 |
bool IsTupleInput, |
| 1912 |
unsigned Opc) { |
1912 |
unsigned Opc) { |
| 1913 |
SDLoc DL(N); |
1913 |
SDLoc DL(N); |
| 1914 |
EVT VT = N->getValueType(0); |
1914 |
EVT VT = N->getValueType(0); |
| 1915 |
unsigned NumInVecs = N->getNumOperands() - 1; |
1915 |
unsigned NumInVecs = N->getNumOperands() - 1; |
| 1916 |
|
1916 |
|
| 1917 |
SmallVector Ops; |
1917 |
SmallVector Ops; |
| 1918 |
if (IsTupleInput) { |
1918 |
if (IsTupleInput) { |
| 1919 |
assert((NumInVecs == 2 || NumInVecs == 4) && |
1919 |
assert((NumInVecs == 2 || NumInVecs == 4) && |
| 1920 |
"Don't know how to handle multi-register input!"); |
1920 |
"Don't know how to handle multi-register input!"); |
| 1921 |
SmallVector Regs(N->op_begin() + 1, |
1921 |
SmallVector Regs(N->op_begin() + 1, |
| 1922 |
N->op_begin() + 1 + NumInVecs); |
1922 |
N->op_begin() + 1 + NumInVecs); |
| 1923 |
Ops.push_back(createZMulTuple(Regs)); |
1923 |
Ops.push_back(createZMulTuple(Regs)); |
| 1924 |
} else { |
1924 |
} else { |
| 1925 |
// All intrinsic nodes have the ID as the first operand, hence the "1 + I". |
1925 |
// All intrinsic nodes have the ID as the first operand, hence the "1 + I". |
| 1926 |
for (unsigned I = 0; I < NumInVecs; I++) |
1926 |
for (unsigned I = 0; I < NumInVecs; I++) |
| 1927 |
Ops.push_back(N->getOperand(1 + I)); |
1927 |
Ops.push_back(N->getOperand(1 + I)); |
| 1928 |
} |
1928 |
} |
| 1929 |
|
1929 |
|
| 1930 |
SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); |
1930 |
SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); |
| 1931 |
SDValue SuperReg = SDValue(Res, 0); |
1931 |
SDValue SuperReg = SDValue(Res, 0); |
| 1932 |
|
1932 |
|
| 1933 |
for (unsigned I = 0; I < NumOutVecs; I++) |
1933 |
for (unsigned I = 0; I < NumOutVecs; I++) |
| 1934 |
ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( |
1934 |
ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( |
| 1935 |
AArch64::zsub0 + I, DL, VT, SuperReg)); |
1935 |
AArch64::zsub0 + I, DL, VT, SuperReg)); |
| 1936 |
CurDAG->RemoveDeadNode(N); |
1936 |
CurDAG->RemoveDeadNode(N); |
| 1937 |
} |
1937 |
} |
| 1938 |
|
1938 |
|
| 1939 |
void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, |
1939 |
void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, |
| 1940 |
unsigned Opc) { |
1940 |
unsigned Opc) { |
| 1941 |
SDLoc dl(N); |
1941 |
SDLoc dl(N); |
| 1942 |
EVT VT = N->getOperand(2)->getValueType(0); |
1942 |
EVT VT = N->getOperand(2)->getValueType(0); |
| 1943 |
|
1943 |
|
| 1944 |
// Form a REG_SEQUENCE to force register allocation. |
1944 |
// Form a REG_SEQUENCE to force register allocation. |
| 1945 |
bool Is128Bit = VT.getSizeInBits() == 128; |
1945 |
bool Is128Bit = VT.getSizeInBits() == 128; |
| 1946 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
1946 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| 1947 |
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); |
1947 |
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); |
| 1948 |
|
1948 |
|
| 1949 |
SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; |
1949 |
SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; |
| 1950 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); |
1950 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); |
| 1951 |
|
1951 |
|
| 1952 |
// Transfer memoperands. |
1952 |
// Transfer memoperands. |
| 1953 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
1953 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
| 1954 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
1954 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
| 1955 |
|
1955 |
|
| 1956 |
ReplaceNode(N, St); |
1956 |
ReplaceNode(N, St); |
| 1957 |
} |
1957 |
} |
| 1958 |
|
1958 |
|
| 1959 |
void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, |
1959 |
void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, |
| 1960 |
unsigned Scale, unsigned Opc_rr, |
1960 |
unsigned Scale, unsigned Opc_rr, |
| 1961 |
unsigned Opc_ri) { |
1961 |
unsigned Opc_ri) { |
| 1962 |
SDLoc dl(N); |
1962 |
SDLoc dl(N); |
| 1963 |
|
1963 |
|
| 1964 |
// Form a REG_SEQUENCE to force register allocation. |
1964 |
// Form a REG_SEQUENCE to force register allocation. |
| 1965 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
1965 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| 1966 |
SDValue RegSeq = createZTuple(Regs); |
1966 |
SDValue RegSeq = createZTuple(Regs); |
| 1967 |
|
1967 |
|
| 1968 |
// Optimize addressing mode. |
1968 |
// Optimize addressing mode. |
| 1969 |
unsigned Opc; |
1969 |
unsigned Opc; |
| 1970 |
SDValue Offset, Base; |
1970 |
SDValue Offset, Base; |
| 1971 |
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( |
1971 |
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( |
| 1972 |
N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), |
1972 |
N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), |
| 1973 |
CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); |
1973 |
CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); |
| 1974 |
|
1974 |
|
| 1975 |
SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate |
1975 |
SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate |
| 1976 |
Base, // address |
1976 |
Base, // address |
| 1977 |
Offset, // offset |
1977 |
Offset, // offset |
| 1978 |
N->getOperand(0)}; // chain |
1978 |
N->getOperand(0)}; // chain |
| 1979 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); |
1979 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); |
| 1980 |
|
1980 |
|
| 1981 |
ReplaceNode(N, St); |
1981 |
ReplaceNode(N, St); |
| 1982 |
} |
1982 |
} |
| 1983 |
|
1983 |
|
| 1984 |
bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, |
1984 |
bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, |
| 1985 |
SDValue &OffImm) { |
1985 |
SDValue &OffImm) { |
| 1986 |
SDLoc dl(N); |
1986 |
SDLoc dl(N); |
| 1987 |
const DataLayout &DL = CurDAG->getDataLayout(); |
1987 |
const DataLayout &DL = CurDAG->getDataLayout(); |
| 1988 |
const TargetLowering *TLI = getTargetLowering(); |
1988 |
const TargetLowering *TLI = getTargetLowering(); |
| 1989 |
|
1989 |
|
| 1990 |
// Try to match it for the frame address |
1990 |
// Try to match it for the frame address |
| 1991 |
if (auto FINode = dyn_cast(N)) { |
1991 |
if (auto FINode = dyn_cast(N)) { |
| 1992 |
int FI = FINode->getIndex(); |
1992 |
int FI = FINode->getIndex(); |
| 1993 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
1993 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 1994 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
1994 |
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); |
| 1995 |
return true; |
1995 |
return true; |
| 1996 |
} |
1996 |
} |
| 1997 |
|
1997 |
|
| 1998 |
return false; |
1998 |
return false; |
| 1999 |
} |
1999 |
} |
| 2000 |
|
2000 |
|
| 2001 |
void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, |
2001 |
void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, |
| 2002 |
unsigned Opc) { |
2002 |
unsigned Opc) { |
| 2003 |
SDLoc dl(N); |
2003 |
SDLoc dl(N); |
| 2004 |
EVT VT = N->getOperand(2)->getValueType(0); |
2004 |
EVT VT = N->getOperand(2)->getValueType(0); |
| 2005 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
2005 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| 2006 |
MVT::Other}; // Type for the Chain |
2006 |
MVT::Other}; // Type for the Chain |
| 2007 |
|
2007 |
|
| 2008 |
// Form a REG_SEQUENCE to force register allocation. |
2008 |
// Form a REG_SEQUENCE to force register allocation. |
| 2009 |
bool Is128Bit = VT.getSizeInBits() == 128; |
2009 |
bool Is128Bit = VT.getSizeInBits() == 128; |
| 2010 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
2010 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| 2011 |
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); |
2011 |
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); |
| 2012 |
|
2012 |
|
| 2013 |
SDValue Ops[] = {RegSeq, |
2013 |
SDValue Ops[] = {RegSeq, |
| 2014 |
N->getOperand(NumVecs + 1), // base register |
2014 |
N->getOperand(NumVecs + 1), // base register |
| 2015 |
N->getOperand(NumVecs + 2), // Incremental |
2015 |
N->getOperand(NumVecs + 2), // Incremental |
| 2016 |
N->getOperand(0)}; // Chain |
2016 |
N->getOperand(0)}; // Chain |
| 2017 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
2017 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| 2018 |
|
2018 |
|
| 2019 |
ReplaceNode(N, St); |
2019 |
ReplaceNode(N, St); |
| 2020 |
} |
2020 |
} |
| 2021 |
|
2021 |
|
| 2022 |
namespace { |
2022 |
namespace { |
| 2023 |
/// WidenVector - Given a value in the V64 register class, produce the |
2023 |
/// WidenVector - Given a value in the V64 register class, produce the |
| 2024 |
/// equivalent value in the V128 register class. |
2024 |
/// equivalent value in the V128 register class. |
| 2025 |
class WidenVector { |
2025 |
class WidenVector { |
| 2026 |
SelectionDAG &DAG; |
2026 |
SelectionDAG &DAG; |
| 2027 |
|
2027 |
|
| 2028 |
public: |
2028 |
public: |
| 2029 |
WidenVector(SelectionDAG &DAG) : DAG(DAG) {} |
2029 |
WidenVector(SelectionDAG &DAG) : DAG(DAG) {} |
| 2030 |
|
2030 |
|
| 2031 |
SDValue operator()(SDValue V64Reg) { |
2031 |
SDValue operator()(SDValue V64Reg) { |
| 2032 |
EVT VT = V64Reg.getValueType(); |
2032 |
EVT VT = V64Reg.getValueType(); |
| 2033 |
unsigned NarrowSize = VT.getVectorNumElements(); |
2033 |
unsigned NarrowSize = VT.getVectorNumElements(); |
| 2034 |
MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
2034 |
MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
| 2035 |
MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); |
2035 |
MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); |
| 2036 |
SDLoc DL(V64Reg); |
2036 |
SDLoc DL(V64Reg); |
| 2037 |
|
2037 |
|
| 2038 |
SDValue Undef = |
2038 |
SDValue Undef = |
| 2039 |
SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); |
2039 |
SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); |
| 2040 |
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); |
2040 |
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); |
| 2041 |
} |
2041 |
} |
| 2042 |
}; |
2042 |
}; |
| 2043 |
} // namespace |
2043 |
} // namespace |
| 2044 |
|
2044 |
|
| 2045 |
/// NarrowVector - Given a value in the V128 register class, produce the |
2045 |
/// NarrowVector - Given a value in the V128 register class, produce the |
| 2046 |
/// equivalent value in the V64 register class. |
2046 |
/// equivalent value in the V64 register class. |
| 2047 |
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { |
2047 |
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { |
| 2048 |
EVT VT = V128Reg.getValueType(); |
2048 |
EVT VT = V128Reg.getValueType(); |
| 2049 |
unsigned WideSize = VT.getVectorNumElements(); |
2049 |
unsigned WideSize = VT.getVectorNumElements(); |
| 2050 |
MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
2050 |
MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
| 2051 |
MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); |
2051 |
MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); |
| 2052 |
|
2052 |
|
| 2053 |
return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, |
2053 |
return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, |
| 2054 |
V128Reg); |
2054 |
V128Reg); |
| 2055 |
} |
2055 |
} |
| 2056 |
|
2056 |
|
| 2057 |
void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, |
2057 |
void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, |
| 2058 |
unsigned Opc) { |
2058 |
unsigned Opc) { |
| 2059 |
SDLoc dl(N); |
2059 |
SDLoc dl(N); |
| 2060 |
EVT VT = N->getValueType(0); |
2060 |
EVT VT = N->getValueType(0); |
| 2061 |
bool Narrow = VT.getSizeInBits() == 64; |
2061 |
bool Narrow = VT.getSizeInBits() == 64; |
| 2062 |
|
2062 |
|
| 2063 |
// Form a REG_SEQUENCE to force register allocation. |
2063 |
// Form a REG_SEQUENCE to force register allocation. |
| 2064 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
2064 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| 2065 |
|
2065 |
|
| 2066 |
if (Narrow) |
2066 |
if (Narrow) |
| 2067 |
transform(Regs, Regs.begin(), |
2067 |
transform(Regs, Regs.begin(), |
| 2068 |
WidenVector(*CurDAG)); |
2068 |
WidenVector(*CurDAG)); |
| 2069 |
|
2069 |
|
| 2070 |
SDValue RegSeq = createQTuple(Regs); |
2070 |
SDValue RegSeq = createQTuple(Regs); |
| 2071 |
|
2071 |
|
| 2072 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
2072 |
const EVT ResTys[] = {MVT::Untyped, MVT::Other}; |
| 2073 |
|
2073 |
|
| 2074 |
unsigned LaneNo = |
2074 |
unsigned LaneNo = |
| 2075 |
cast(N->getOperand(NumVecs + 2))->getZExtValue(); |
2075 |
cast(N->getOperand(NumVecs + 2))->getZExtValue(); |
| 2076 |
|
2076 |
|
| 2077 |
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
2077 |
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
| 2078 |
N->getOperand(NumVecs + 3), N->getOperand(0)}; |
2078 |
N->getOperand(NumVecs + 3), N->getOperand(0)}; |
| 2079 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
2079 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| 2080 |
SDValue SuperReg = SDValue(Ld, 0); |
2080 |
SDValue SuperReg = SDValue(Ld, 0); |
| 2081 |
|
2081 |
|
| 2082 |
EVT WideVT = RegSeq.getOperand(1)->getValueType(0); |
2082 |
EVT WideVT = RegSeq.getOperand(1)->getValueType(0); |
| 2083 |
static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, |
2083 |
static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, |
| 2084 |
AArch64::qsub2, AArch64::qsub3 }; |
2084 |
AArch64::qsub2, AArch64::qsub3 }; |
| 2085 |
for (unsigned i = 0; i < NumVecs; ++i) { |
2085 |
for (unsigned i = 0; i < NumVecs; ++i) { |
| 2086 |
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); |
2086 |
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); |
| 2087 |
if (Narrow) |
2087 |
if (Narrow) |
| 2088 |
NV = NarrowVector(NV, *CurDAG); |
2088 |
NV = NarrowVector(NV, *CurDAG); |
| 2089 |
ReplaceUses(SDValue(N, i), NV); |
2089 |
ReplaceUses(SDValue(N, i), NV); |
| 2090 |
} |
2090 |
} |
| 2091 |
|
2091 |
|
| 2092 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); |
2092 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); |
| 2093 |
CurDAG->RemoveDeadNode(N); |
2093 |
CurDAG->RemoveDeadNode(N); |
| 2094 |
} |
2094 |
} |
| 2095 |
|
2095 |
|
| 2096 |
void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, |
2096 |
void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, |
| 2097 |
unsigned Opc) { |
2097 |
unsigned Opc) { |
| 2098 |
SDLoc dl(N); |
2098 |
SDLoc dl(N); |
| 2099 |
EVT VT = N->getValueType(0); |
2099 |
EVT VT = N->getValueType(0); |
| 2100 |
bool Narrow = VT.getSizeInBits() == 64; |
2100 |
bool Narrow = VT.getSizeInBits() == 64; |
| 2101 |
|
2101 |
|
| 2102 |
// Form a REG_SEQUENCE to force register allocation. |
2102 |
// Form a REG_SEQUENCE to force register allocation. |
| 2103 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
2103 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| 2104 |
|
2104 |
|
| 2105 |
if (Narrow) |
2105 |
if (Narrow) |
| 2106 |
transform(Regs, Regs.begin(), |
2106 |
transform(Regs, Regs.begin(), |
| 2107 |
WidenVector(*CurDAG)); |
2107 |
WidenVector(*CurDAG)); |
| 2108 |
|
2108 |
|
| 2109 |
SDValue RegSeq = createQTuple(Regs); |
2109 |
SDValue RegSeq = createQTuple(Regs); |
| 2110 |
|
2110 |
|
| 2111 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
2111 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| 2112 |
RegSeq->getValueType(0), MVT::Other}; |
2112 |
RegSeq->getValueType(0), MVT::Other}; |
| 2113 |
|
2113 |
|
| 2114 |
unsigned LaneNo = |
2114 |
unsigned LaneNo = |
| 2115 |
cast(N->getOperand(NumVecs + 1))->getZExtValue(); |
2115 |
cast(N->getOperand(NumVecs + 1))->getZExtValue(); |
| 2116 |
|
2116 |
|
| 2117 |
SDValue Ops[] = {RegSeq, |
2117 |
SDValue Ops[] = {RegSeq, |
| 2118 |
CurDAG->getTargetConstant(LaneNo, dl, |
2118 |
CurDAG->getTargetConstant(LaneNo, dl, |
| 2119 |
MVT::i64), // Lane Number |
2119 |
MVT::i64), // Lane Number |
| 2120 |
N->getOperand(NumVecs + 2), // Base register |
2120 |
N->getOperand(NumVecs + 2), // Base register |
| 2121 |
N->getOperand(NumVecs + 3), // Incremental |
2121 |
N->getOperand(NumVecs + 3), // Incremental |
| 2122 |
N->getOperand(0)}; |
2122 |
N->getOperand(0)}; |
| 2123 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
2123 |
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| 2124 |
|
2124 |
|
| 2125 |
// Update uses of the write back register |
2125 |
// Update uses of the write back register |
| 2126 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); |
2126 |
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); |
| 2127 |
|
2127 |
|
| 2128 |
// Update uses of the vector list |
2128 |
// Update uses of the vector list |
| 2129 |
SDValue SuperReg = SDValue(Ld, 1); |
2129 |
SDValue SuperReg = SDValue(Ld, 1); |
| 2130 |
if (NumVecs == 1) { |
2130 |
if (NumVecs == 1) { |
| 2131 |
ReplaceUses(SDValue(N, 0), |
2131 |
ReplaceUses(SDValue(N, 0), |
| 2132 |
Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); |
2132 |
Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); |
| 2133 |
} else { |
2133 |
} else { |
| 2134 |
EVT WideVT = RegSeq.getOperand(1)->getValueType(0); |
2134 |
EVT WideVT = RegSeq.getOperand(1)->getValueType(0); |
| 2135 |
static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, |
2135 |
static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, |
| 2136 |
AArch64::qsub2, AArch64::qsub3 }; |
2136 |
AArch64::qsub2, AArch64::qsub3 }; |
| 2137 |
for (unsigned i = 0; i < NumVecs; ++i) { |
2137 |
for (unsigned i = 0; i < NumVecs; ++i) { |
| 2138 |
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, |
2138 |
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, |
| 2139 |
SuperReg); |
2139 |
SuperReg); |
| 2140 |
if (Narrow) |
2140 |
if (Narrow) |
| 2141 |
NV = NarrowVector(NV, *CurDAG); |
2141 |
NV = NarrowVector(NV, *CurDAG); |
| 2142 |
ReplaceUses(SDValue(N, i), NV); |
2142 |
ReplaceUses(SDValue(N, i), NV); |
| 2143 |
} |
2143 |
} |
| 2144 |
} |
2144 |
} |
| 2145 |
|
2145 |
|
| 2146 |
// Update the Chain |
2146 |
// Update the Chain |
| 2147 |
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); |
2147 |
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); |
| 2148 |
CurDAG->RemoveDeadNode(N); |
2148 |
CurDAG->RemoveDeadNode(N); |
| 2149 |
} |
2149 |
} |
| 2150 |
|
2150 |
|
| 2151 |
void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, |
2151 |
void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, |
| 2152 |
unsigned Opc) { |
2152 |
unsigned Opc) { |
| 2153 |
SDLoc dl(N); |
2153 |
SDLoc dl(N); |
| 2154 |
EVT VT = N->getOperand(2)->getValueType(0); |
2154 |
EVT VT = N->getOperand(2)->getValueType(0); |
| 2155 |
bool Narrow = VT.getSizeInBits() == 64; |
2155 |
bool Narrow = VT.getSizeInBits() == 64; |
| 2156 |
|
2156 |
|
| 2157 |
// Form a REG_SEQUENCE to force register allocation. |
2157 |
// Form a REG_SEQUENCE to force register allocation. |
| 2158 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
2158 |
SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); |
| 2159 |
|
2159 |
|
| 2160 |
if (Narrow) |
2160 |
if (Narrow) |
| 2161 |
transform(Regs, Regs.begin(), |
2161 |
transform(Regs, Regs.begin(), |
| 2162 |
WidenVector(*CurDAG)); |
2162 |
WidenVector(*CurDAG)); |
| 2163 |
|
2163 |
|
| 2164 |
SDValue RegSeq = createQTuple(Regs); |
2164 |
SDValue RegSeq = createQTuple(Regs); |
| 2165 |
|
2165 |
|
| 2166 |
unsigned LaneNo = |
2166 |
unsigned LaneNo = |
| 2167 |
cast(N->getOperand(NumVecs + 2))->getZExtValue(); |
2167 |
cast(N->getOperand(NumVecs + 2))->getZExtValue(); |
| 2168 |
|
2168 |
|
| 2169 |
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
2169 |
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
| 2170 |
N->getOperand(NumVecs + 3), N->getOperand(0)}; |
2170 |
N->getOperand(NumVecs + 3), N->getOperand(0)}; |
| 2171 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
2171 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
| 2172 |
|
2172 |
|
| 2173 |
// Transfer memoperands. |
2173 |
// Transfer memoperands. |
| 2174 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
2174 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
| 2175 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
2175 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
| 2176 |
|
2176 |
|
| 2177 |
ReplaceNode(N, St); |
2177 |
ReplaceNode(N, St); |
| 2178 |
} |
2178 |
} |
| 2179 |
|
2179 |
|
| 2180 |
void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, |
2180 |
void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, |
| 2181 |
unsigned Opc) { |
2181 |
unsigned Opc) { |
| 2182 |
SDLoc dl(N); |
2182 |
SDLoc dl(N); |
| 2183 |
EVT VT = N->getOperand(2)->getValueType(0); |
2183 |
EVT VT = N->getOperand(2)->getValueType(0); |
| 2184 |
bool Narrow = VT.getSizeInBits() == 64; |
2184 |
bool Narrow = VT.getSizeInBits() == 64; |
| 2185 |
|
2185 |
|
| 2186 |
// Form a REG_SEQUENCE to force register allocation. |
2186 |
// Form a REG_SEQUENCE to force register allocation. |
| 2187 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
2187 |
SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); |
| 2188 |
|
2188 |
|
| 2189 |
if (Narrow) |
2189 |
if (Narrow) |
| 2190 |
transform(Regs, Regs.begin(), |
2190 |
transform(Regs, Regs.begin(), |
| 2191 |
WidenVector(*CurDAG)); |
2191 |
WidenVector(*CurDAG)); |
| 2192 |
|
2192 |
|
| 2193 |
SDValue RegSeq = createQTuple(Regs); |
2193 |
SDValue RegSeq = createQTuple(Regs); |
| 2194 |
|
2194 |
|
| 2195 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
2195 |
const EVT ResTys[] = {MVT::i64, // Type of the write back register |
| 2196 |
MVT::Other}; |
2196 |
MVT::Other}; |
| 2197 |
|
2197 |
|
| 2198 |
unsigned LaneNo = |
2198 |
unsigned LaneNo = |
| 2199 |
cast(N->getOperand(NumVecs + 1))->getZExtValue(); |
2199 |
cast(N->getOperand(NumVecs + 1))->getZExtValue(); |
| 2200 |
|
2200 |
|
| 2201 |
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
2201 |
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), |
| 2202 |
N->getOperand(NumVecs + 2), // Base Register |
2202 |
N->getOperand(NumVecs + 2), // Base Register |
| 2203 |
N->getOperand(NumVecs + 3), // Incremental |
2203 |
N->getOperand(NumVecs + 3), // Incremental |
| 2204 |
N->getOperand(0)}; |
2204 |
N->getOperand(0)}; |
| 2205 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
2205 |
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); |
| 2206 |
|
2206 |
|
| 2207 |
// Transfer memoperands. |
2207 |
// Transfer memoperands. |
| 2208 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
2208 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
| 2209 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
2209 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
| 2210 |
|
2210 |
|
| 2211 |
ReplaceNode(N, St); |
2211 |
ReplaceNode(N, St); |
| 2212 |
} |
2212 |
} |
| 2213 |
|
2213 |
|
| 2214 |
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, |
2214 |
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, |
| 2215 |
unsigned &Opc, SDValue &Opd0, |
2215 |
unsigned &Opc, SDValue &Opd0, |
| 2216 |
unsigned &LSB, unsigned &MSB, |
2216 |
unsigned &LSB, unsigned &MSB, |
| 2217 |
unsigned NumberOfIgnoredLowBits, |
2217 |
unsigned NumberOfIgnoredLowBits, |
| 2218 |
bool BiggerPattern) { |
2218 |
bool BiggerPattern) { |
| 2219 |
assert(N->getOpcode() == ISD::AND && |
2219 |
assert(N->getOpcode() == ISD::AND && |
| 2220 |
"N must be a AND operation to call this function"); |
2220 |
"N must be a AND operation to call this function"); |
| 2221 |
|
2221 |
|
| 2222 |
EVT VT = N->getValueType(0); |
2222 |
EVT VT = N->getValueType(0); |
| 2223 |
|
2223 |
|
| 2224 |
// Here we can test the type of VT and return false when the type does not |
2224 |
// Here we can test the type of VT and return false when the type does not |
| 2225 |
// match, but since it is done prior to that call in the current context |
2225 |
// match, but since it is done prior to that call in the current context |
| 2226 |
// we turned that into an assert to avoid redundant code. |
2226 |
// we turned that into an assert to avoid redundant code. |
| 2227 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
2227 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 2228 |
"Type checking must have been done before calling this function"); |
2228 |
"Type checking must have been done before calling this function"); |
| 2229 |
|
2229 |
|
| 2230 |
// FIXME: simplify-demanded-bits in DAGCombine will probably have |
2230 |
// FIXME: simplify-demanded-bits in DAGCombine will probably have |
| 2231 |
// changed the AND node to a 32-bit mask operation. We'll have to |
2231 |
// changed the AND node to a 32-bit mask operation. We'll have to |
| 2232 |
// undo that as part of the transform here if we want to catch all |
2232 |
// undo that as part of the transform here if we want to catch all |
| 2233 |
// the opportunities. |
2233 |
// the opportunities. |
| 2234 |
// Currently the NumberOfIgnoredLowBits argument helps to recover |
2234 |
// Currently the NumberOfIgnoredLowBits argument helps to recover |
| 2235 |
// from these situations when matching bigger pattern (bitfield insert). |
2235 |
// from these situations when matching bigger pattern (bitfield insert). |
| 2236 |
|
2236 |
|
| 2237 |
// For unsigned extracts, check for a shift right and mask |
2237 |
// For unsigned extracts, check for a shift right and mask |
| 2238 |
uint64_t AndImm = 0; |
2238 |
uint64_t AndImm = 0; |
| 2239 |
if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) |
2239 |
if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) |
| 2240 |
return false; |
2240 |
return false; |
| 2241 |
|
2241 |
|
| 2242 |
const SDNode *Op0 = N->getOperand(0).getNode(); |
2242 |
const SDNode *Op0 = N->getOperand(0).getNode(); |
| 2243 |
|
2243 |
|
| 2244 |
// Because of simplify-demanded-bits in DAGCombine, the mask may have been |
2244 |
// Because of simplify-demanded-bits in DAGCombine, the mask may have been |
| 2245 |
// simplified. Try to undo that |
2245 |
// simplified. Try to undo that |
| 2246 |
AndImm |= maskTrailingOnes(NumberOfIgnoredLowBits); |
2246 |
AndImm |= maskTrailingOnes(NumberOfIgnoredLowBits); |
| 2247 |
|
2247 |
|
| 2248 |
// The immediate is a mask of the low bits iff imm & (imm+1) == 0 |
2248 |
// The immediate is a mask of the low bits iff imm & (imm+1) == 0 |
| 2249 |
if (AndImm & (AndImm + 1)) |
2249 |
if (AndImm & (AndImm + 1)) |
| 2250 |
return false; |
2250 |
return false; |
| 2251 |
|
2251 |
|
| 2252 |
bool ClampMSB = false; |
2252 |
bool ClampMSB = false; |
| 2253 |
uint64_t SrlImm = 0; |
2253 |
uint64_t SrlImm = 0; |
| 2254 |
// Handle the SRL + ANY_EXTEND case. |
2254 |
// Handle the SRL + ANY_EXTEND case. |
| 2255 |
if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && |
2255 |
if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && |
| 2256 |
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { |
2256 |
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { |
| 2257 |
// Extend the incoming operand of the SRL to 64-bit. |
2257 |
// Extend the incoming operand of the SRL to 64-bit. |
| 2258 |
Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); |
2258 |
Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); |
| 2259 |
// Make sure to clamp the MSB so that we preserve the semantics of the |
2259 |
// Make sure to clamp the MSB so that we preserve the semantics of the |
| 2260 |
// original operations. |
2260 |
// original operations. |
| 2261 |
ClampMSB = true; |
2261 |
ClampMSB = true; |
| 2262 |
} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && |
2262 |
} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && |
| 2263 |
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, |
2263 |
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, |
| 2264 |
SrlImm)) { |
2264 |
SrlImm)) { |
| 2265 |
// If the shift result was truncated, we can still combine them. |
2265 |
// If the shift result was truncated, we can still combine them. |
| 2266 |
Opd0 = Op0->getOperand(0).getOperand(0); |
2266 |
Opd0 = Op0->getOperand(0).getOperand(0); |
| 2267 |
|
2267 |
|
| 2268 |
// Use the type of SRL node. |
2268 |
// Use the type of SRL node. |
| 2269 |
VT = Opd0->getValueType(0); |
2269 |
VT = Opd0->getValueType(0); |
| 2270 |
} else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { |
2270 |
} else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { |
| 2271 |
Opd0 = Op0->getOperand(0); |
2271 |
Opd0 = Op0->getOperand(0); |
| 2272 |
ClampMSB = (VT == MVT::i32); |
2272 |
ClampMSB = (VT == MVT::i32); |
| 2273 |
} else if (BiggerPattern) { |
2273 |
} else if (BiggerPattern) { |
| 2274 |
// Let's pretend a 0 shift right has been performed. |
2274 |
// Let's pretend a 0 shift right has been performed. |
| 2275 |
// The resulting code will be at least as good as the original one |
2275 |
// The resulting code will be at least as good as the original one |
| 2276 |
// plus it may expose more opportunities for bitfield insert pattern. |
2276 |
// plus it may expose more opportunities for bitfield insert pattern. |
| 2277 |
// FIXME: Currently we limit this to the bigger pattern, because |
2277 |
// FIXME: Currently we limit this to the bigger pattern, because |
| 2278 |
// some optimizations expect AND and not UBFM. |
2278 |
// some optimizations expect AND and not UBFM. |
| 2279 |
Opd0 = N->getOperand(0); |
2279 |
Opd0 = N->getOperand(0); |
| 2280 |
} else |
2280 |
} else |
| 2281 |
return false; |
2281 |
return false; |
| 2282 |
|
2282 |
|
| 2283 |
// Bail out on large immediates. This happens when no proper |
2283 |
// Bail out on large immediates. This happens when no proper |
| 2284 |
// combining/constant folding was performed. |
2284 |
// combining/constant folding was performed. |
| 2285 |
if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { |
2285 |
if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { |
| 2286 |
LLVM_DEBUG( |
2286 |
LLVM_DEBUG( |
| 2287 |
(dbgs() << N |
2287 |
(dbgs() << N |
| 2288 |
<< ": Found large shift immediate, this should not happen\n")); |
2288 |
<< ": Found large shift immediate, this should not happen\n")); |
| 2289 |
return false; |
2289 |
return false; |
| 2290 |
} |
2290 |
} |
| 2291 |
|
2291 |
|
| 2292 |
LSB = SrlImm; |
2292 |
LSB = SrlImm; |
| 2293 |
MSB = SrlImm + |
2293 |
MSB = SrlImm + |
| 2294 |
(VT == MVT::i32 ? llvm::countr_one(AndImm) |
2294 |
(VT == MVT::i32 ? llvm::countr_one(AndImm) |
| 2295 |
: llvm::countr_one(AndImm)) - |
2295 |
: llvm::countr_one(AndImm)) - |
| 2296 |
1; |
2296 |
1; |
| 2297 |
if (ClampMSB) |
2297 |
if (ClampMSB) |
| 2298 |
// Since we're moving the extend before the right shift operation, we need |
2298 |
// Since we're moving the extend before the right shift operation, we need |
| 2299 |
// to clamp the MSB to make sure we don't shift in undefined bits instead of |
2299 |
// to clamp the MSB to make sure we don't shift in undefined bits instead of |
| 2300 |
// the zeros which would get shifted in with the original right shift |
2300 |
// the zeros which would get shifted in with the original right shift |
| 2301 |
// operation. |
2301 |
// operation. |
| 2302 |
MSB = MSB > 31 ? 31 : MSB; |
2302 |
MSB = MSB > 31 ? 31 : MSB; |
| 2303 |
|
2303 |
|
| 2304 |
Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
2304 |
Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
| 2305 |
return true; |
2305 |
return true; |
| 2306 |
} |
2306 |
} |
| 2307 |
|
2307 |
|
| 2308 |
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, |
2308 |
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, |
| 2309 |
SDValue &Opd0, unsigned &Immr, |
2309 |
SDValue &Opd0, unsigned &Immr, |
| 2310 |
unsigned &Imms) { |
2310 |
unsigned &Imms) { |
| 2311 |
assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
2311 |
assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
| 2312 |
|
2312 |
|
| 2313 |
EVT VT = N->getValueType(0); |
2313 |
EVT VT = N->getValueType(0); |
| 2314 |
unsigned BitWidth = VT.getSizeInBits(); |
2314 |
unsigned BitWidth = VT.getSizeInBits(); |
| 2315 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
2315 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 2316 |
"Type checking must have been done before calling this function"); |
2316 |
"Type checking must have been done before calling this function"); |
| 2317 |
|
2317 |
|
| 2318 |
SDValue Op = N->getOperand(0); |
2318 |
SDValue Op = N->getOperand(0); |
| 2319 |
if (Op->getOpcode() == ISD::TRUNCATE) { |
2319 |
if (Op->getOpcode() == ISD::TRUNCATE) { |
| 2320 |
Op = Op->getOperand(0); |
2320 |
Op = Op->getOperand(0); |
| 2321 |
VT = Op->getValueType(0); |
2321 |
VT = Op->getValueType(0); |
| 2322 |
BitWidth = VT.getSizeInBits(); |
2322 |
BitWidth = VT.getSizeInBits(); |
| 2323 |
} |
2323 |
} |
| 2324 |
|
2324 |
|
| 2325 |
uint64_t ShiftImm; |
2325 |
uint64_t ShiftImm; |
| 2326 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && |
2326 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && |
| 2327 |
!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) |
2327 |
!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) |
| 2328 |
return false; |
2328 |
return false; |
| 2329 |
|
2329 |
|
| 2330 |
unsigned Width = cast(N->getOperand(1))->getVT().getSizeInBits(); |
2330 |
unsigned Width = cast(N->getOperand(1))->getVT().getSizeInBits(); |
| 2331 |
if (ShiftImm + Width > BitWidth) |
2331 |
if (ShiftImm + Width > BitWidth) |
| 2332 |
return false; |
2332 |
return false; |
| 2333 |
|
2333 |
|
| 2334 |
Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; |
2334 |
Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; |
| 2335 |
Opd0 = Op.getOperand(0); |
2335 |
Opd0 = Op.getOperand(0); |
| 2336 |
Immr = ShiftImm; |
2336 |
Immr = ShiftImm; |
| 2337 |
Imms = ShiftImm + Width - 1; |
2337 |
Imms = ShiftImm + Width - 1; |
| 2338 |
return true; |
2338 |
return true; |
| 2339 |
} |
2339 |
} |
| 2340 |
|
2340 |
|
| 2341 |
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, |
2341 |
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, |
| 2342 |
SDValue &Opd0, unsigned &LSB, |
2342 |
SDValue &Opd0, unsigned &LSB, |
| 2343 |
unsigned &MSB) { |
2343 |
unsigned &MSB) { |
| 2344 |
// We are looking for the following pattern which basically extracts several |
2344 |
// We are looking for the following pattern which basically extracts several |
| 2345 |
// continuous bits from the source value and places it from the LSB of the |
2345 |
// continuous bits from the source value and places it from the LSB of the |
| 2346 |
// destination value, all other bits of the destination value or set to zero: |
2346 |
// destination value, all other bits of the destination value or set to zero: |
| 2347 |
// |
2347 |
// |
| 2348 |
// Value2 = AND Value, MaskImm |
2348 |
// Value2 = AND Value, MaskImm |
| 2349 |
// SRL Value2, ShiftImm |
2349 |
// SRL Value2, ShiftImm |
| 2350 |
// |
2350 |
// |
| 2351 |
// with MaskImm >> ShiftImm to search for the bit width. |
2351 |
// with MaskImm >> ShiftImm to search for the bit width. |
| 2352 |
// |
2352 |
// |
| 2353 |
// This gets selected into a single UBFM: |
2353 |
// This gets selected into a single UBFM: |
| 2354 |
// |
2354 |
// |
| 2355 |
// UBFM Value, ShiftImm, Log2_64(MaskImm) |
2355 |
// UBFM Value, ShiftImm, Log2_64(MaskImm) |
| 2356 |
// |
2356 |
// |
| 2357 |
|
2357 |
|
| 2358 |
if (N->getOpcode() != ISD::SRL) |
2358 |
if (N->getOpcode() != ISD::SRL) |
| 2359 |
return false; |
2359 |
return false; |
| 2360 |
|
2360 |
|
| 2361 |
uint64_t AndMask = 0; |
2361 |
uint64_t AndMask = 0; |
| 2362 |
if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) |
2362 |
if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) |
| 2363 |
return false; |
2363 |
return false; |
| 2364 |
|
2364 |
|
| 2365 |
Opd0 = N->getOperand(0).getOperand(0); |
2365 |
Opd0 = N->getOperand(0).getOperand(0); |
| 2366 |
|
2366 |
|
| 2367 |
uint64_t SrlImm = 0; |
2367 |
uint64_t SrlImm = 0; |
| 2368 |
if (!isIntImmediate(N->getOperand(1), SrlImm)) |
2368 |
if (!isIntImmediate(N->getOperand(1), SrlImm)) |
| 2369 |
return false; |
2369 |
return false; |
| 2370 |
|
2370 |
|
| 2371 |
// Check whether we really have several bits extract here. |
2371 |
// Check whether we really have several bits extract here. |
| 2372 |
if (!isMask_64(AndMask >> SrlImm)) |
2372 |
if (!isMask_64(AndMask >> SrlImm)) |
| 2373 |
return false; |
2373 |
return false; |
| 2374 |
|
2374 |
|
| 2375 |
Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
2375 |
Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
| 2376 |
LSB = SrlImm; |
2376 |
LSB = SrlImm; |
| 2377 |
MSB = llvm::Log2_64(AndMask); |
2377 |
MSB = llvm::Log2_64(AndMask); |
| 2378 |
return true; |
2378 |
return true; |
| 2379 |
} |
2379 |
} |
| 2380 |
|
2380 |
|
| 2381 |
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, |
2381 |
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, |
| 2382 |
unsigned &Immr, unsigned &Imms, |
2382 |
unsigned &Immr, unsigned &Imms, |
| 2383 |
bool BiggerPattern) { |
2383 |
bool BiggerPattern) { |
| 2384 |
assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && |
2384 |
assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && |
| 2385 |
"N must be a SHR/SRA operation to call this function"); |
2385 |
"N must be a SHR/SRA operation to call this function"); |
| 2386 |
|
2386 |
|
| 2387 |
EVT VT = N->getValueType(0); |
2387 |
EVT VT = N->getValueType(0); |
| 2388 |
|
2388 |
|
| 2389 |
// Here we can test the type of VT and return false when the type does not |
2389 |
// Here we can test the type of VT and return false when the type does not |
| 2390 |
// match, but since it is done prior to that call in the current context |
2390 |
// match, but since it is done prior to that call in the current context |
| 2391 |
// we turned that into an assert to avoid redundant code. |
2391 |
// we turned that into an assert to avoid redundant code. |
| 2392 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
2392 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 2393 |
"Type checking must have been done before calling this function"); |
2393 |
"Type checking must have been done before calling this function"); |
| 2394 |
|
2394 |
|
| 2395 |
// Check for AND + SRL doing several bits extract. |
2395 |
// Check for AND + SRL doing several bits extract. |
| 2396 |
if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) |
2396 |
if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) |
| 2397 |
return true; |
2397 |
return true; |
| 2398 |
|
2398 |
|
| 2399 |
// We're looking for a shift of a shift. |
2399 |
// We're looking for a shift of a shift. |
| 2400 |
uint64_t ShlImm = 0; |
2400 |
uint64_t ShlImm = 0; |
| 2401 |
uint64_t TruncBits = 0; |
2401 |
uint64_t TruncBits = 0; |
| 2402 |
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { |
2402 |
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { |
| 2403 |
Opd0 = N->getOperand(0).getOperand(0); |
2403 |
Opd0 = N->getOperand(0).getOperand(0); |
| 2404 |
} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && |
2404 |
} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && |
| 2405 |
N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { |
2405 |
N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { |
| 2406 |
// We are looking for a shift of truncate. Truncate from i64 to i32 could |
2406 |
// We are looking for a shift of truncate. Truncate from i64 to i32 could |
| 2407 |
// be considered as setting high 32 bits as zero. Our strategy here is to |
2407 |
// be considered as setting high 32 bits as zero. Our strategy here is to |
| 2408 |
// always generate 64bit UBFM. This consistency will help the CSE pass |
2408 |
// always generate 64bit UBFM. This consistency will help the CSE pass |
| 2409 |
// later find more redundancy. |
2409 |
// later find more redundancy. |
| 2410 |
Opd0 = N->getOperand(0).getOperand(0); |
2410 |
Opd0 = N->getOperand(0).getOperand(0); |
| 2411 |
TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); |
2411 |
TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); |
| 2412 |
VT = Opd0.getValueType(); |
2412 |
VT = Opd0.getValueType(); |
| 2413 |
assert(VT == MVT::i64 && "the promoted type should be i64"); |
2413 |
assert(VT == MVT::i64 && "the promoted type should be i64"); |
| 2414 |
} else if (BiggerPattern) { |
2414 |
} else if (BiggerPattern) { |
| 2415 |
// Let's pretend a 0 shift left has been performed. |
2415 |
// Let's pretend a 0 shift left has been performed. |
| 2416 |
// FIXME: Currently we limit this to the bigger pattern case, |
2416 |
// FIXME: Currently we limit this to the bigger pattern case, |
| 2417 |
// because some optimizations expect AND and not UBFM |
2417 |
// because some optimizations expect AND and not UBFM |
| 2418 |
Opd0 = N->getOperand(0); |
2418 |
Opd0 = N->getOperand(0); |
| 2419 |
} else |
2419 |
} else |
| 2420 |
return false; |
2420 |
return false; |
| 2421 |
|
2421 |
|
| 2422 |
// Missing combines/constant folding may have left us with strange |
2422 |
// Missing combines/constant folding may have left us with strange |
| 2423 |
// constants. |
2423 |
// constants. |
| 2424 |
if (ShlImm >= VT.getSizeInBits()) { |
2424 |
if (ShlImm >= VT.getSizeInBits()) { |
| 2425 |
LLVM_DEBUG( |
2425 |
LLVM_DEBUG( |
| 2426 |
(dbgs() << N |
2426 |
(dbgs() << N |
| 2427 |
<< ": Found large shift immediate, this should not happen\n")); |
2427 |
<< ": Found large shift immediate, this should not happen\n")); |
| 2428 |
return false; |
2428 |
return false; |
| 2429 |
} |
2429 |
} |
| 2430 |
|
2430 |
|
| 2431 |
uint64_t SrlImm = 0; |
2431 |
uint64_t SrlImm = 0; |
| 2432 |
if (!isIntImmediate(N->getOperand(1), SrlImm)) |
2432 |
if (!isIntImmediate(N->getOperand(1), SrlImm)) |
| 2433 |
return false; |
2433 |
return false; |
| 2434 |
|
2434 |
|
| 2435 |
assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && |
2435 |
assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && |
| 2436 |
"bad amount in shift node!"); |
2436 |
"bad amount in shift node!"); |
| 2437 |
int immr = SrlImm - ShlImm; |
2437 |
int immr = SrlImm - ShlImm; |
| 2438 |
Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; |
2438 |
Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; |
| 2439 |
Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; |
2439 |
Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; |
| 2440 |
// SRA requires a signed extraction |
2440 |
// SRA requires a signed extraction |
| 2441 |
if (VT == MVT::i32) |
2441 |
if (VT == MVT::i32) |
| 2442 |
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; |
2442 |
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; |
| 2443 |
else |
2443 |
else |
| 2444 |
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; |
2444 |
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; |
| 2445 |
return true; |
2445 |
return true; |
| 2446 |
} |
2446 |
} |
| 2447 |
|
2447 |
|
| 2448 |
bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { |
2448 |
bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { |
| 2449 |
assert(N->getOpcode() == ISD::SIGN_EXTEND); |
2449 |
assert(N->getOpcode() == ISD::SIGN_EXTEND); |
| 2450 |
|
2450 |
|
| 2451 |
EVT VT = N->getValueType(0); |
2451 |
EVT VT = N->getValueType(0); |
| 2452 |
EVT NarrowVT = N->getOperand(0)->getValueType(0); |
2452 |
EVT NarrowVT = N->getOperand(0)->getValueType(0); |
| 2453 |
if (VT != MVT::i64 || NarrowVT != MVT::i32) |
2453 |
if (VT != MVT::i64 || NarrowVT != MVT::i32) |
| 2454 |
return false; |
2454 |
return false; |
| 2455 |
|
2455 |
|
| 2456 |
uint64_t ShiftImm; |
2456 |
uint64_t ShiftImm; |
| 2457 |
SDValue Op = N->getOperand(0); |
2457 |
SDValue Op = N->getOperand(0); |
| 2458 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) |
2458 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) |
| 2459 |
return false; |
2459 |
return false; |
| 2460 |
|
2460 |
|
| 2461 |
SDLoc dl(N); |
2461 |
SDLoc dl(N); |
| 2462 |
// Extend the incoming operand of the shift to 64-bits. |
2462 |
// Extend the incoming operand of the shift to 64-bits. |
| 2463 |
SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); |
2463 |
SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); |
| 2464 |
unsigned Immr = ShiftImm; |
2464 |
unsigned Immr = ShiftImm; |
| 2465 |
unsigned Imms = NarrowVT.getSizeInBits() - 1; |
2465 |
unsigned Imms = NarrowVT.getSizeInBits() - 1; |
| 2466 |
SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), |
2466 |
SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), |
| 2467 |
CurDAG->getTargetConstant(Imms, dl, VT)}; |
2467 |
CurDAG->getTargetConstant(Imms, dl, VT)}; |
| 2468 |
CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); |
2468 |
CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); |
| 2469 |
return true; |
2469 |
return true; |
| 2470 |
} |
2470 |
} |
| 2471 |
|
2471 |
|
| 2472 |
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, |
2472 |
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, |
| 2473 |
SDValue &Opd0, unsigned &Immr, unsigned &Imms, |
2473 |
SDValue &Opd0, unsigned &Immr, unsigned &Imms, |
| 2474 |
unsigned NumberOfIgnoredLowBits = 0, |
2474 |
unsigned NumberOfIgnoredLowBits = 0, |
| 2475 |
bool BiggerPattern = false) { |
2475 |
bool BiggerPattern = false) { |
| 2476 |
if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) |
2476 |
if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) |
| 2477 |
return false; |
2477 |
return false; |
| 2478 |
|
2478 |
|
| 2479 |
switch (N->getOpcode()) { |
2479 |
switch (N->getOpcode()) { |
| 2480 |
default: |
2480 |
default: |
| 2481 |
if (!N->isMachineOpcode()) |
2481 |
if (!N->isMachineOpcode()) |
| 2482 |
return false; |
2482 |
return false; |
| 2483 |
break; |
2483 |
break; |
| 2484 |
case ISD::AND: |
2484 |
case ISD::AND: |
| 2485 |
return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, |
2485 |
return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, |
| 2486 |
NumberOfIgnoredLowBits, BiggerPattern); |
2486 |
NumberOfIgnoredLowBits, BiggerPattern); |
| 2487 |
case ISD::SRL: |
2487 |
case ISD::SRL: |
| 2488 |
case ISD::SRA: |
2488 |
case ISD::SRA: |
| 2489 |
return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); |
2489 |
return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); |
| 2490 |
|
2490 |
|
| 2491 |
case ISD::SIGN_EXTEND_INREG: |
2491 |
case ISD::SIGN_EXTEND_INREG: |
| 2492 |
return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); |
2492 |
return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); |
| 2493 |
} |
2493 |
} |
| 2494 |
|
2494 |
|
| 2495 |
unsigned NOpc = N->getMachineOpcode(); |
2495 |
unsigned NOpc = N->getMachineOpcode(); |
| 2496 |
switch (NOpc) { |
2496 |
switch (NOpc) { |
| 2497 |
default: |
2497 |
default: |
| 2498 |
return false; |
2498 |
return false; |
| 2499 |
case AArch64::SBFMWri: |
2499 |
case AArch64::SBFMWri: |
| 2500 |
case AArch64::UBFMWri: |
2500 |
case AArch64::UBFMWri: |
| 2501 |
case AArch64::SBFMXri: |
2501 |
case AArch64::SBFMXri: |
| 2502 |
case AArch64::UBFMXri: |
2502 |
case AArch64::UBFMXri: |
| 2503 |
Opc = NOpc; |
2503 |
Opc = NOpc; |
| 2504 |
Opd0 = N->getOperand(0); |
2504 |
Opd0 = N->getOperand(0); |
| 2505 |
Immr = cast(N->getOperand(1).getNode())->getZExtValue(); |
2505 |
Immr = cast(N->getOperand(1).getNode())->getZExtValue(); |
| 2506 |
Imms = cast(N->getOperand(2).getNode())->getZExtValue(); |
2506 |
Imms = cast(N->getOperand(2).getNode())->getZExtValue(); |
| 2507 |
return true; |
2507 |
return true; |
| 2508 |
} |
2508 |
} |
| 2509 |
// Unreachable |
2509 |
// Unreachable |
| 2510 |
return false; |
2510 |
return false; |
| 2511 |
} |
2511 |
} |
| 2512 |
|
2512 |
|
| 2513 |
bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { |
2513 |
bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { |
| 2514 |
unsigned Opc, Immr, Imms; |
2514 |
unsigned Opc, Immr, Imms; |
| 2515 |
SDValue Opd0; |
2515 |
SDValue Opd0; |
| 2516 |
if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) |
2516 |
if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) |
| 2517 |
return false; |
2517 |
return false; |
| 2518 |
|
2518 |
|
| 2519 |
EVT VT = N->getValueType(0); |
2519 |
EVT VT = N->getValueType(0); |
| 2520 |
SDLoc dl(N); |
2520 |
SDLoc dl(N); |
| 2521 |
|
2521 |
|
| 2522 |
// If the bit extract operation is 64bit but the original type is 32bit, we |
2522 |
// If the bit extract operation is 64bit but the original type is 32bit, we |
| 2523 |
// need to add one EXTRACT_SUBREG. |
2523 |
// need to add one EXTRACT_SUBREG. |
| 2524 |
if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { |
2524 |
if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { |
| 2525 |
SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), |
2525 |
SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), |
| 2526 |
CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; |
2526 |
CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; |
| 2527 |
|
2527 |
|
| 2528 |
SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); |
2528 |
SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); |
| 2529 |
SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, |
2529 |
SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, |
| 2530 |
MVT::i32, SDValue(BFM, 0)); |
2530 |
MVT::i32, SDValue(BFM, 0)); |
| 2531 |
ReplaceNode(N, Inner.getNode()); |
2531 |
ReplaceNode(N, Inner.getNode()); |
| 2532 |
return true; |
2532 |
return true; |
| 2533 |
} |
2533 |
} |
| 2534 |
|
2534 |
|
| 2535 |
SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), |
2535 |
SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), |
| 2536 |
CurDAG->getTargetConstant(Imms, dl, VT)}; |
2536 |
CurDAG->getTargetConstant(Imms, dl, VT)}; |
| 2537 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
2537 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
| 2538 |
return true; |
2538 |
return true; |
| 2539 |
} |
2539 |
} |
| 2540 |
|
2540 |
|
| 2541 |
/// Does DstMask form a complementary pair with the mask provided by |
2541 |
/// Does DstMask form a complementary pair with the mask provided by |
| 2542 |
/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, |
2542 |
/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, |
| 2543 |
/// this asks whether DstMask zeroes precisely those bits that will be set by |
2543 |
/// this asks whether DstMask zeroes precisely those bits that will be set by |
| 2544 |
/// the other half. |
2544 |
/// the other half. |
| 2545 |
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, |
2545 |
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, |
| 2546 |
unsigned NumberOfIgnoredHighBits, EVT VT) { |
2546 |
unsigned NumberOfIgnoredHighBits, EVT VT) { |
| 2547 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
2547 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 2548 |
"i32 or i64 mask type expected!"); |
2548 |
"i32 or i64 mask type expected!"); |
| 2549 |
unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; |
2549 |
unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; |
| 2550 |
|
2550 |
|
| 2551 |
APInt SignificantDstMask = APInt(BitWidth, DstMask); |
2551 |
APInt SignificantDstMask = APInt(BitWidth, DstMask); |
| 2552 |
APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); |
2552 |
APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); |
| 2553 |
|
2553 |
|
| 2554 |
return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && |
2554 |
return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && |
| 2555 |
(SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); |
2555 |
(SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); |
| 2556 |
} |
2556 |
} |
| 2557 |
|
2557 |
|
| 2558 |
// Look for bits that will be useful for later uses. |
2558 |
// Look for bits that will be useful for later uses. |
| 2559 |
// A bit is consider useless as soon as it is dropped and never used |
2559 |
// A bit is consider useless as soon as it is dropped and never used |
| 2560 |
// before it as been dropped. |
2560 |
// before it as been dropped. |
| 2561 |
// E.g., looking for useful bit of x |
2561 |
// E.g., looking for useful bit of x |
| 2562 |
// 1. y = x & 0x7 |
2562 |
// 1. y = x & 0x7 |
| 2563 |
// 2. z = y >> 2 |
2563 |
// 2. z = y >> 2 |
| 2564 |
// After #1, x useful bits are 0x7, then the useful bits of x, live through |
2564 |
// After #1, x useful bits are 0x7, then the useful bits of x, live through |
| 2565 |
// y. |
2565 |
// y. |
| 2566 |
// After #2, the useful bits of x are 0x4. |
2566 |
// After #2, the useful bits of x are 0x4. |
| 2567 |
// However, if x is used on an unpredicatable instruction, then all its bits |
2567 |
// However, if x is used on an unpredicatable instruction, then all its bits |
| 2568 |
// are useful. |
2568 |
// are useful. |
| 2569 |
// E.g. |
2569 |
// E.g. |
| 2570 |
// 1. y = x & 0x7 |
2570 |
// 1. y = x & 0x7 |
| 2571 |
// 2. z = y >> 2 |
2571 |
// 2. z = y >> 2 |
| 2572 |
// 3. str x, [@x] |
2572 |
// 3. str x, [@x] |
| 2573 |
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); |
2573 |
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); |
| 2574 |
|
2574 |
|
| 2575 |
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, |
2575 |
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, |
| 2576 |
unsigned Depth) { |
2576 |
unsigned Depth) { |
| 2577 |
uint64_t Imm = |
2577 |
uint64_t Imm = |
| 2578 |
cast(Op.getOperand(1).getNode())->getZExtValue(); |
2578 |
cast(Op.getOperand(1).getNode())->getZExtValue(); |
| 2579 |
Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); |
2579 |
Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); |
| 2580 |
UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); |
2580 |
UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); |
| 2581 |
getUsefulBits(Op, UsefulBits, Depth + 1); |
2581 |
getUsefulBits(Op, UsefulBits, Depth + 1); |
| 2582 |
} |
2582 |
} |
| 2583 |
|
2583 |
|
| 2584 |
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, |
2584 |
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, |
| 2585 |
uint64_t Imm, uint64_t MSB, |
2585 |
uint64_t Imm, uint64_t MSB, |
| 2586 |
unsigned Depth) { |
2586 |
unsigned Depth) { |
| 2587 |
// inherit the bitwidth value |
2587 |
// inherit the bitwidth value |
| 2588 |
APInt OpUsefulBits(UsefulBits); |
2588 |
APInt OpUsefulBits(UsefulBits); |
| 2589 |
OpUsefulBits = 1; |
2589 |
OpUsefulBits = 1; |
| 2590 |
|
2590 |
|
| 2591 |
if (MSB >= Imm) { |
2591 |
if (MSB >= Imm) { |
| 2592 |
OpUsefulBits <<= MSB - Imm + 1; |
2592 |
OpUsefulBits <<= MSB - Imm + 1; |
| 2593 |
--OpUsefulBits; |
2593 |
--OpUsefulBits; |
| 2594 |
// The interesting part will be in the lower part of the result |
2594 |
// The interesting part will be in the lower part of the result |
| 2595 |
getUsefulBits(Op, OpUsefulBits, Depth + 1); |
2595 |
getUsefulBits(Op, OpUsefulBits, Depth + 1); |
| 2596 |
// The interesting part was starting at Imm in the argument |
2596 |
// The interesting part was starting at Imm in the argument |
| 2597 |
OpUsefulBits <<= Imm; |
2597 |
OpUsefulBits <<= Imm; |
| 2598 |
} else { |
2598 |
} else { |
| 2599 |
OpUsefulBits <<= MSB + 1; |
2599 |
OpUsefulBits <<= MSB + 1; |
| 2600 |
--OpUsefulBits; |
2600 |
--OpUsefulBits; |
| 2601 |
// The interesting part will be shifted in the result |
2601 |
// The interesting part will be shifted in the result |
| 2602 |
OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; |
2602 |
OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; |
| 2603 |
getUsefulBits(Op, OpUsefulBits, Depth + 1); |
2603 |
getUsefulBits(Op, OpUsefulBits, Depth + 1); |
| 2604 |
// The interesting part was at zero in the argument |
2604 |
// The interesting part was at zero in the argument |
| 2605 |
OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); |
2605 |
OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); |
| 2606 |
} |
2606 |
} |
| 2607 |
|
2607 |
|
| 2608 |
UsefulBits &= OpUsefulBits; |
2608 |
UsefulBits &= OpUsefulBits; |
| 2609 |
} |
2609 |
} |
| 2610 |
|
2610 |
|
| 2611 |
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, |
2611 |
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, |
| 2612 |
unsigned Depth) { |
2612 |
unsigned Depth) { |
| 2613 |
uint64_t Imm = |
2613 |
uint64_t Imm = |
| 2614 |
cast(Op.getOperand(1).getNode())->getZExtValue(); |
2614 |
cast(Op.getOperand(1).getNode())->getZExtValue(); |
| 2615 |
uint64_t MSB = |
2615 |
uint64_t MSB = |
| 2616 |
cast(Op.getOperand(2).getNode())->getZExtValue(); |
2616 |
cast(Op.getOperand(2).getNode())->getZExtValue(); |
| 2617 |
|
2617 |
|
| 2618 |
getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); |
2618 |
getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); |
| 2619 |
} |
2619 |
} |
| 2620 |
|
2620 |
|
| 2621 |
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, |
2621 |
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, |
| 2622 |
unsigned Depth) { |
2622 |
unsigned Depth) { |
| 2623 |
uint64_t ShiftTypeAndValue = |
2623 |
uint64_t ShiftTypeAndValue = |
| 2624 |
cast(Op.getOperand(2).getNode())->getZExtValue(); |
2624 |
cast(Op.getOperand(2).getNode())->getZExtValue(); |
| 2625 |
APInt Mask(UsefulBits); |
2625 |
APInt Mask(UsefulBits); |
| 2626 |
Mask.clearAllBits(); |
2626 |
Mask.clearAllBits(); |
| 2627 |
Mask.flipAllBits(); |
2627 |
Mask.flipAllBits(); |
| 2628 |
|
2628 |
|
| 2629 |
if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { |
2629 |
if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { |
| 2630 |
// Shift Left |
2630 |
// Shift Left |
| 2631 |
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); |
2631 |
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); |
| 2632 |
Mask <<= ShiftAmt; |
2632 |
Mask <<= ShiftAmt; |
| 2633 |
getUsefulBits(Op, Mask, Depth + 1); |
2633 |
getUsefulBits(Op, Mask, Depth + 1); |
| 2634 |
Mask.lshrInPlace(ShiftAmt); |
2634 |
Mask.lshrInPlace(ShiftAmt); |
| 2635 |
} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { |
2635 |
} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { |
| 2636 |
// Shift Right |
2636 |
// Shift Right |
| 2637 |
// We do not handle AArch64_AM::ASR, because the sign will change the |
2637 |
// We do not handle AArch64_AM::ASR, because the sign will change the |
| 2638 |
// number of useful bits |
2638 |
// number of useful bits |
| 2639 |
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); |
2639 |
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); |
| 2640 |
Mask.lshrInPlace(ShiftAmt); |
2640 |
Mask.lshrInPlace(ShiftAmt); |
| 2641 |
getUsefulBits(Op, Mask, Depth + 1); |
2641 |
getUsefulBits(Op, Mask, Depth + 1); |
| 2642 |
Mask <<= ShiftAmt; |
2642 |
Mask <<= ShiftAmt; |
| 2643 |
} else |
2643 |
} else |
| 2644 |
return; |
2644 |
return; |
| 2645 |
|
2645 |
|
| 2646 |
UsefulBits &= Mask; |
2646 |
UsefulBits &= Mask; |
| 2647 |
} |
2647 |
} |
| 2648 |
|
2648 |
|
| 2649 |
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, |
2649 |
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, |
| 2650 |
unsigned Depth) { |
2650 |
unsigned Depth) { |
| 2651 |
uint64_t Imm = |
2651 |
uint64_t Imm = |
| 2652 |
cast(Op.getOperand(2).getNode())->getZExtValue(); |
2652 |
cast(Op.getOperand(2).getNode())->getZExtValue(); |
| 2653 |
uint64_t MSB = |
2653 |
uint64_t MSB = |
| 2654 |
cast(Op.getOperand(3).getNode())->getZExtValue(); |
2654 |
cast(Op.getOperand(3).getNode())->getZExtValue(); |
| 2655 |
|
2655 |
|
| 2656 |
APInt OpUsefulBits(UsefulBits); |
2656 |
APInt OpUsefulBits(UsefulBits); |
| 2657 |
OpUsefulBits = 1; |
2657 |
OpUsefulBits = 1; |
| 2658 |
|
2658 |
|
| 2659 |
APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); |
2659 |
APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); |
| 2660 |
ResultUsefulBits.flipAllBits(); |
2660 |
ResultUsefulBits.flipAllBits(); |
| 2661 |
APInt Mask(UsefulBits.getBitWidth(), 0); |
2661 |
APInt Mask(UsefulBits.getBitWidth(), 0); |
| 2662 |
|
2662 |
|
| 2663 |
getUsefulBits(Op, ResultUsefulBits, Depth + 1); |
2663 |
getUsefulBits(Op, ResultUsefulBits, Depth + 1); |
| 2664 |
|
2664 |
|
| 2665 |
if (MSB >= Imm) { |
2665 |
if (MSB >= Imm) { |
| 2666 |
// The instruction is a BFXIL. |
2666 |
// The instruction is a BFXIL. |
| 2667 |
uint64_t Width = MSB - Imm + 1; |
2667 |
uint64_t Width = MSB - Imm + 1; |
| 2668 |
uint64_t LSB = Imm; |
2668 |
uint64_t LSB = Imm; |
| 2669 |
|
2669 |
|
| 2670 |
OpUsefulBits <<= Width; |
2670 |
OpUsefulBits <<= Width; |
| 2671 |
--OpUsefulBits; |
2671 |
--OpUsefulBits; |
| 2672 |
|
2672 |
|
| 2673 |
if (Op.getOperand(1) == Orig) { |
2673 |
if (Op.getOperand(1) == Orig) { |
| 2674 |
// Copy the low bits from the result to bits starting from LSB. |
2674 |
// Copy the low bits from the result to bits starting from LSB. |
| 2675 |
Mask = ResultUsefulBits & OpUsefulBits; |
2675 |
Mask = ResultUsefulBits & OpUsefulBits; |
| 2676 |
Mask <<= LSB; |
2676 |
Mask <<= LSB; |
| 2677 |
} |
2677 |
} |
| 2678 |
|
2678 |
|
| 2679 |
if (Op.getOperand(0) == Orig) |
2679 |
if (Op.getOperand(0) == Orig) |
| 2680 |
// Bits starting from LSB in the input contribute to the result. |
2680 |
// Bits starting from LSB in the input contribute to the result. |
| 2681 |
Mask |= (ResultUsefulBits & ~OpUsefulBits); |
2681 |
Mask |= (ResultUsefulBits & ~OpUsefulBits); |
| 2682 |
} else { |
2682 |
} else { |
| 2683 |
// The instruction is a BFI. |
2683 |
// The instruction is a BFI. |
| 2684 |
uint64_t Width = MSB + 1; |
2684 |
uint64_t Width = MSB + 1; |
| 2685 |
uint64_t LSB = UsefulBits.getBitWidth() - Imm; |
2685 |
uint64_t LSB = UsefulBits.getBitWidth() - Imm; |
| 2686 |
|
2686 |
|
| 2687 |
OpUsefulBits <<= Width; |
2687 |
OpUsefulBits <<= Width; |
| 2688 |
--OpUsefulBits; |
2688 |
--OpUsefulBits; |
| 2689 |
OpUsefulBits <<= LSB; |
2689 |
OpUsefulBits <<= LSB; |
| 2690 |
|
2690 |
|
| 2691 |
if (Op.getOperand(1) == Orig) { |
2691 |
if (Op.getOperand(1) == Orig) { |
| 2692 |
// Copy the bits from the result to the zero bits. |
2692 |
// Copy the bits from the result to the zero bits. |
| 2693 |
Mask = ResultUsefulBits & OpUsefulBits; |
2693 |
Mask = ResultUsefulBits & OpUsefulBits; |
| 2694 |
Mask.lshrInPlace(LSB); |
2694 |
Mask.lshrInPlace(LSB); |
| 2695 |
} |
2695 |
} |
| 2696 |
|
2696 |
|
| 2697 |
if (Op.getOperand(0) == Orig) |
2697 |
if (Op.getOperand(0) == Orig) |
| 2698 |
Mask |= (ResultUsefulBits & ~OpUsefulBits); |
2698 |
Mask |= (ResultUsefulBits & ~OpUsefulBits); |
| 2699 |
} |
2699 |
} |
| 2700 |
|
2700 |
|
| 2701 |
UsefulBits &= Mask; |
2701 |
UsefulBits &= Mask; |
| 2702 |
} |
2702 |
} |
| 2703 |
|
2703 |
|
| 2704 |
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, |
2704 |
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, |
| 2705 |
SDValue Orig, unsigned Depth) { |
2705 |
SDValue Orig, unsigned Depth) { |
| 2706 |
|
2706 |
|
| 2707 |
// Users of this node should have already been instruction selected |
2707 |
// Users of this node should have already been instruction selected |
| 2708 |
// FIXME: Can we turn that into an assert? |
2708 |
// FIXME: Can we turn that into an assert? |
| 2709 |
if (!UserNode->isMachineOpcode()) |
2709 |
if (!UserNode->isMachineOpcode()) |
| 2710 |
return; |
2710 |
return; |
| 2711 |
|
2711 |
|
| 2712 |
switch (UserNode->getMachineOpcode()) { |
2712 |
switch (UserNode->getMachineOpcode()) { |
| 2713 |
default: |
2713 |
default: |
| 2714 |
return; |
2714 |
return; |
| 2715 |
case AArch64::ANDSWri: |
2715 |
case AArch64::ANDSWri: |
| 2716 |
case AArch64::ANDSXri: |
2716 |
case AArch64::ANDSXri: |
| 2717 |
case AArch64::ANDWri: |
2717 |
case AArch64::ANDWri: |
| 2718 |
case AArch64::ANDXri: |
2718 |
case AArch64::ANDXri: |
| 2719 |
// We increment Depth only when we call the getUsefulBits |
2719 |
// We increment Depth only when we call the getUsefulBits |
| 2720 |
return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, |
2720 |
return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, |
| 2721 |
Depth); |
2721 |
Depth); |
| 2722 |
case AArch64::UBFMWri: |
2722 |
case AArch64::UBFMWri: |
| 2723 |
case AArch64::UBFMXri: |
2723 |
case AArch64::UBFMXri: |
| 2724 |
return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); |
2724 |
return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); |
| 2725 |
|
2725 |
|
| 2726 |
case AArch64::ORRWrs: |
2726 |
case AArch64::ORRWrs: |
| 2727 |
case AArch64::ORRXrs: |
2727 |
case AArch64::ORRXrs: |
| 2728 |
if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) |
2728 |
if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) |
| 2729 |
getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, |
2729 |
getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, |
| 2730 |
Depth); |
2730 |
Depth); |
| 2731 |
return; |
2731 |
return; |
| 2732 |
case AArch64::BFMWri: |
2732 |
case AArch64::BFMWri: |
| 2733 |
case AArch64::BFMXri: |
2733 |
case AArch64::BFMXri: |
| 2734 |
return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); |
2734 |
return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); |
| 2735 |
|
2735 |
|
| 2736 |
case AArch64::STRBBui: |
2736 |
case AArch64::STRBBui: |
| 2737 |
case AArch64::STURBBi: |
2737 |
case AArch64::STURBBi: |
| 2738 |
if (UserNode->getOperand(0) != Orig) |
2738 |
if (UserNode->getOperand(0) != Orig) |
| 2739 |
return; |
2739 |
return; |
| 2740 |
UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); |
2740 |
UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); |
| 2741 |
return; |
2741 |
return; |
| 2742 |
|
2742 |
|
| 2743 |
case AArch64::STRHHui: |
2743 |
case AArch64::STRHHui: |
| 2744 |
case AArch64::STURHHi: |
2744 |
case AArch64::STURHHi: |
| 2745 |
if (UserNode->getOperand(0) != Orig) |
2745 |
if (UserNode->getOperand(0) != Orig) |
| 2746 |
return; |
2746 |
return; |
| 2747 |
UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); |
2747 |
UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); |
| 2748 |
return; |
2748 |
return; |
| 2749 |
} |
2749 |
} |
| 2750 |
} |
2750 |
} |
| 2751 |
|
2751 |
|
| 2752 |
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { |
2752 |
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { |
| 2753 |
if (Depth >= SelectionDAG::MaxRecursionDepth) |
2753 |
if (Depth >= SelectionDAG::MaxRecursionDepth) |
| 2754 |
return; |
2754 |
return; |
| 2755 |
// Initialize UsefulBits |
2755 |
// Initialize UsefulBits |
| 2756 |
if (!Depth) { |
2756 |
if (!Depth) { |
| 2757 |
unsigned Bitwidth = Op.getScalarValueSizeInBits(); |
2757 |
unsigned Bitwidth = Op.getScalarValueSizeInBits(); |
| 2758 |
// At the beginning, assume every produced bits is useful |
2758 |
// At the beginning, assume every produced bits is useful |
| 2759 |
UsefulBits = APInt(Bitwidth, 0); |
2759 |
UsefulBits = APInt(Bitwidth, 0); |
| 2760 |
UsefulBits.flipAllBits(); |
2760 |
UsefulBits.flipAllBits(); |
| 2761 |
} |
2761 |
} |
| 2762 |
APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); |
2762 |
APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); |
| 2763 |
|
2763 |
|
| 2764 |
for (SDNode *Node : Op.getNode()->uses()) { |
2764 |
for (SDNode *Node : Op.getNode()->uses()) { |
| 2765 |
// A use cannot produce useful bits |
2765 |
// A use cannot produce useful bits |
| 2766 |
APInt UsefulBitsForUse = APInt(UsefulBits); |
2766 |
APInt UsefulBitsForUse = APInt(UsefulBits); |
| 2767 |
getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); |
2767 |
getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); |
| 2768 |
UsersUsefulBits |= UsefulBitsForUse; |
2768 |
UsersUsefulBits |= UsefulBitsForUse; |
| 2769 |
} |
2769 |
} |
| 2770 |
// UsefulBits contains the produced bits that are meaningful for the |
2770 |
// UsefulBits contains the produced bits that are meaningful for the |
| 2771 |
// current definition, thus a user cannot make a bit meaningful at |
2771 |
// current definition, thus a user cannot make a bit meaningful at |
| 2772 |
// this point |
2772 |
// this point |
| 2773 |
UsefulBits &= UsersUsefulBits; |
2773 |
UsefulBits &= UsersUsefulBits; |
| 2774 |
} |
2774 |
} |
| 2775 |
|
2775 |
|
| 2776 |
/// Create a machine node performing a notional SHL of Op by ShlAmount. If |
2776 |
/// Create a machine node performing a notional SHL of Op by ShlAmount. If |
| 2777 |
/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is |
2777 |
/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is |
| 2778 |
/// 0, return Op unchanged. |
2778 |
/// 0, return Op unchanged. |
| 2779 |
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { |
2779 |
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { |
| 2780 |
if (ShlAmount == 0) |
2780 |
if (ShlAmount == 0) |
| 2781 |
return Op; |
2781 |
return Op; |
| 2782 |
|
2782 |
|
| 2783 |
EVT VT = Op.getValueType(); |
2783 |
EVT VT = Op.getValueType(); |
| 2784 |
SDLoc dl(Op); |
2784 |
SDLoc dl(Op); |
| 2785 |
unsigned BitWidth = VT.getSizeInBits(); |
2785 |
unsigned BitWidth = VT.getSizeInBits(); |
| 2786 |
unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
2786 |
unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; |
| 2787 |
|
2787 |
|
| 2788 |
SDNode *ShiftNode; |
2788 |
SDNode *ShiftNode; |
| 2789 |
if (ShlAmount > 0) { |
2789 |
if (ShlAmount > 0) { |
| 2790 |
// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt |
2790 |
// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt |
| 2791 |
ShiftNode = CurDAG->getMachineNode( |
2791 |
ShiftNode = CurDAG->getMachineNode( |
| 2792 |
UBFMOpc, dl, VT, Op, |
2792 |
UBFMOpc, dl, VT, Op, |
| 2793 |
CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), |
2793 |
CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), |
| 2794 |
CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); |
2794 |
CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); |
| 2795 |
} else { |
2795 |
} else { |
| 2796 |
// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 |
2796 |
// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 |
| 2797 |
assert(ShlAmount < 0 && "expected right shift"); |
2797 |
assert(ShlAmount < 0 && "expected right shift"); |
| 2798 |
int ShrAmount = -ShlAmount; |
2798 |
int ShrAmount = -ShlAmount; |
| 2799 |
ShiftNode = CurDAG->getMachineNode( |
2799 |
ShiftNode = CurDAG->getMachineNode( |
| 2800 |
UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), |
2800 |
UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), |
| 2801 |
CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); |
2801 |
CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); |
| 2802 |
} |
2802 |
} |
| 2803 |
|
2803 |
|
| 2804 |
return SDValue(ShiftNode, 0); |
2804 |
return SDValue(ShiftNode, 0); |
| 2805 |
} |
2805 |
} |
| 2806 |
|
2806 |
|
| 2807 |
// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)". |
2807 |
// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)". |
| 2808 |
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, |
2808 |
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, |
| 2809 |
bool BiggerPattern, |
2809 |
bool BiggerPattern, |
| 2810 |
const uint64_t NonZeroBits, |
2810 |
const uint64_t NonZeroBits, |
| 2811 |
SDValue &Src, int &DstLSB, |
2811 |
SDValue &Src, int &DstLSB, |
| 2812 |
int &Width); |
2812 |
int &Width); |
| 2813 |
|
2813 |
|
| 2814 |
// For bit-field-positioning pattern "shl VAL, N)". |
2814 |
// For bit-field-positioning pattern "shl VAL, N)". |
| 2815 |
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, |
2815 |
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, |
| 2816 |
bool BiggerPattern, |
2816 |
bool BiggerPattern, |
| 2817 |
const uint64_t NonZeroBits, |
2817 |
const uint64_t NonZeroBits, |
| 2818 |
SDValue &Src, int &DstLSB, |
2818 |
SDValue &Src, int &DstLSB, |
| 2819 |
int &Width); |
2819 |
int &Width); |
| 2820 |
|
2820 |
|
| 2821 |
/// Does this tree qualify as an attempt to move a bitfield into position, |
2821 |
/// Does this tree qualify as an attempt to move a bitfield into position, |
| 2822 |
/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N). |
2822 |
/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N). |
| 2823 |
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, |
2823 |
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, |
| 2824 |
bool BiggerPattern, SDValue &Src, |
2824 |
bool BiggerPattern, SDValue &Src, |
| 2825 |
int &DstLSB, int &Width) { |
2825 |
int &DstLSB, int &Width) { |
| 2826 |
EVT VT = Op.getValueType(); |
2826 |
EVT VT = Op.getValueType(); |
| 2827 |
unsigned BitWidth = VT.getSizeInBits(); |
2827 |
unsigned BitWidth = VT.getSizeInBits(); |
| 2828 |
(void)BitWidth; |
2828 |
(void)BitWidth; |
| 2829 |
assert(BitWidth == 32 || BitWidth == 64); |
2829 |
assert(BitWidth == 32 || BitWidth == 64); |
| 2830 |
|
2830 |
|
| 2831 |
KnownBits Known = CurDAG->computeKnownBits(Op); |
2831 |
KnownBits Known = CurDAG->computeKnownBits(Op); |
| 2832 |
|
2832 |
|
| 2833 |
// Non-zero in the sense that they're not provably zero, which is the key |
2833 |
// Non-zero in the sense that they're not provably zero, which is the key |
| 2834 |
// point if we want to use this value |
2834 |
// point if we want to use this value |
| 2835 |
const uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); |
2835 |
const uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); |
| 2836 |
if (!isShiftedMask_64(NonZeroBits)) |
2836 |
if (!isShiftedMask_64(NonZeroBits)) |
| 2837 |
return false; |
2837 |
return false; |
| 2838 |
|
2838 |
|
| 2839 |
switch (Op.getOpcode()) { |
2839 |
switch (Op.getOpcode()) { |
| 2840 |
default: |
2840 |
default: |
| 2841 |
break; |
2841 |
break; |
| 2842 |
case ISD::AND: |
2842 |
case ISD::AND: |
| 2843 |
return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern, |
2843 |
return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern, |
| 2844 |
NonZeroBits, Src, DstLSB, Width); |
2844 |
NonZeroBits, Src, DstLSB, Width); |
| 2845 |
case ISD::SHL: |
2845 |
case ISD::SHL: |
| 2846 |
return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern, |
2846 |
return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern, |
| 2847 |
NonZeroBits, Src, DstLSB, Width); |
2847 |
NonZeroBits, Src, DstLSB, Width); |
| 2848 |
} |
2848 |
} |
| 2849 |
|
2849 |
|
| 2850 |
return false; |
2850 |
return false; |
| 2851 |
} |
2851 |
} |
| 2852 |
|
2852 |
|
| 2853 |
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, |
2853 |
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, |
| 2854 |
bool BiggerPattern, |
2854 |
bool BiggerPattern, |
| 2855 |
const uint64_t NonZeroBits, |
2855 |
const uint64_t NonZeroBits, |
| 2856 |
SDValue &Src, int &DstLSB, |
2856 |
SDValue &Src, int &DstLSB, |
| 2857 |
int &Width) { |
2857 |
int &Width) { |
| 2858 |
assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); |
2858 |
assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); |
| 2859 |
|
2859 |
|
| 2860 |
EVT VT = Op.getValueType(); |
2860 |
EVT VT = Op.getValueType(); |
| 2861 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
2861 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 2862 |
"Caller guarantees VT is one of i32 or i64"); |
2862 |
"Caller guarantees VT is one of i32 or i64"); |
| 2863 |
(void)VT; |
2863 |
(void)VT; |
| 2864 |
|
2864 |
|
| 2865 |
uint64_t AndImm; |
2865 |
uint64_t AndImm; |
| 2866 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) |
2866 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) |
| 2867 |
return false; |
2867 |
return false; |
| 2868 |
|
2868 |
|
| 2869 |
// If (~AndImm & NonZeroBits) is not zero at POS, we know that |
2869 |
// If (~AndImm & NonZeroBits) is not zero at POS, we know that |
| 2870 |
// 1) (AndImm & (1 << POS) == 0) |
2870 |
// 1) (AndImm & (1 << POS) == 0) |
| 2871 |
// 2) the result of AND is not zero at POS bit (according to NonZeroBits) |
2871 |
// 2) the result of AND is not zero at POS bit (according to NonZeroBits) |
| 2872 |
// |
2872 |
// |
| 2873 |
// 1) and 2) don't agree so something must be wrong (e.g., in |
2873 |
// 1) and 2) don't agree so something must be wrong (e.g., in |
| 2874 |
// 'SelectionDAG::computeKnownBits') |
2874 |
// 'SelectionDAG::computeKnownBits') |
| 2875 |
assert((~AndImm & NonZeroBits) == 0 && |
2875 |
assert((~AndImm & NonZeroBits) == 0 && |
| 2876 |
"Something must be wrong (e.g., in SelectionDAG::computeKnownBits)"); |
2876 |
"Something must be wrong (e.g., in SelectionDAG::computeKnownBits)"); |
| 2877 |
|
2877 |
|
| 2878 |
SDValue AndOp0 = Op.getOperand(0); |
2878 |
SDValue AndOp0 = Op.getOperand(0); |
| 2879 |
|
2879 |
|
| 2880 |
uint64_t ShlImm; |
2880 |
uint64_t ShlImm; |
| 2881 |
SDValue ShlOp0; |
2881 |
SDValue ShlOp0; |
| 2882 |
if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { |
2882 |
if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { |
| 2883 |
// For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. |
2883 |
// For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. |
| 2884 |
ShlOp0 = AndOp0.getOperand(0); |
2884 |
ShlOp0 = AndOp0.getOperand(0); |
| 2885 |
} else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && |
2885 |
} else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && |
| 2886 |
isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, |
2886 |
isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, |
| 2887 |
ShlImm)) { |
2887 |
ShlImm)) { |
| 2888 |
// For pattern "and(any_extend(shl(val, N)), shifted-mask)" |
2888 |
// For pattern "and(any_extend(shl(val, N)), shifted-mask)" |
| 2889 |
|
2889 |
|
| 2890 |
// ShlVal == shl(val, N), which is a left shift on a smaller type. |
2890 |
// ShlVal == shl(val, N), which is a left shift on a smaller type. |
| 2891 |
SDValue ShlVal = AndOp0.getOperand(0); |
2891 |
SDValue ShlVal = AndOp0.getOperand(0); |
| 2892 |
|
2892 |
|
| 2893 |
// Since this is after type legalization and ShlVal is extended to MVT::i64, |
2893 |
// Since this is after type legalization and ShlVal is extended to MVT::i64, |
| 2894 |
// expect VT to be MVT::i32. |
2894 |
// expect VT to be MVT::i32. |
| 2895 |
assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); |
2895 |
assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); |
| 2896 |
|
2896 |
|
| 2897 |
// Widens 'val' to MVT::i64 as the source of bit field positioning. |
2897 |
// Widens 'val' to MVT::i64 as the source of bit field positioning. |
| 2898 |
ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); |
2898 |
ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); |
| 2899 |
} else |
2899 |
} else |
| 2900 |
return false; |
2900 |
return false; |
| 2901 |
|
2901 |
|
| 2902 |
// For !BiggerPattern, bail out if the AndOp0 has more than one use, since |
2902 |
// For !BiggerPattern, bail out if the AndOp0 has more than one use, since |
| 2903 |
// then we'll end up generating AndOp0+UBFIZ instead of just keeping |
2903 |
// then we'll end up generating AndOp0+UBFIZ instead of just keeping |
| 2904 |
// AndOp0+AND. |
2904 |
// AndOp0+AND. |
| 2905 |
if (!BiggerPattern && !AndOp0.hasOneUse()) |
2905 |
if (!BiggerPattern && !AndOp0.hasOneUse()) |
| 2906 |
return false; |
2906 |
return false; |
| 2907 |
|
2907 |
|
| 2908 |
DstLSB = llvm::countr_zero(NonZeroBits); |
2908 |
DstLSB = llvm::countr_zero(NonZeroBits); |
| 2909 |
Width = llvm::countr_one(NonZeroBits >> DstLSB); |
2909 |
Width = llvm::countr_one(NonZeroBits >> DstLSB); |
| 2910 |
|
2910 |
|
| 2911 |
// Bail out on large Width. This happens when no proper combining / constant |
2911 |
// Bail out on large Width. This happens when no proper combining / constant |
| 2912 |
// folding was performed. |
2912 |
// folding was performed. |
| 2913 |
if (Width >= (int)VT.getSizeInBits()) { |
2913 |
if (Width >= (int)VT.getSizeInBits()) { |
| 2914 |
// If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and |
2914 |
// If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and |
| 2915 |
// Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to |
2915 |
// Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to |
| 2916 |
// "val". |
2916 |
// "val". |
| 2917 |
// If VT is i32, what Width >= 32 means: |
2917 |
// If VT is i32, what Width >= 32 means: |
| 2918 |
// - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op |
2918 |
// - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op |
| 2919 |
// demands at least 'Width' bits (after dag-combiner). This together with |
2919 |
// demands at least 'Width' bits (after dag-combiner). This together with |
| 2920 |
// `any_extend` Op (undefined higher bits) indicates missed combination |
2920 |
// `any_extend` Op (undefined higher bits) indicates missed combination |
| 2921 |
// when lowering the 'and' IR instruction to an machine IR instruction. |
2921 |
// when lowering the 'and' IR instruction to an machine IR instruction. |
| 2922 |
LLVM_DEBUG( |
2922 |
LLVM_DEBUG( |
| 2923 |
dbgs() |
2923 |
dbgs() |
| 2924 |
<< "Found large Width in bit-field-positioning -- this indicates no " |
2924 |
<< "Found large Width in bit-field-positioning -- this indicates no " |
| 2925 |
"proper combining / constant folding was performed\n"); |
2925 |
"proper combining / constant folding was performed\n"); |
| 2926 |
return false; |
2926 |
return false; |
| 2927 |
} |
2927 |
} |
| 2928 |
|
2928 |
|
| 2929 |
// BFI encompasses sufficiently many nodes that it's worth inserting an extra |
2929 |
// BFI encompasses sufficiently many nodes that it's worth inserting an extra |
| 2930 |
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL |
2930 |
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL |
| 2931 |
// amount. BiggerPattern is true when this pattern is being matched for BFI, |
2931 |
// amount. BiggerPattern is true when this pattern is being matched for BFI, |
| 2932 |
// BiggerPattern is false when this pattern is being matched for UBFIZ, in |
2932 |
// BiggerPattern is false when this pattern is being matched for UBFIZ, in |
| 2933 |
// which case it is not profitable to insert an extra shift. |
2933 |
// which case it is not profitable to insert an extra shift. |
| 2934 |
if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) |
2934 |
if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) |
| 2935 |
return false; |
2935 |
return false; |
| 2936 |
|
2936 |
|
| 2937 |
Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); |
2937 |
Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); |
| 2938 |
return true; |
2938 |
return true; |
| 2939 |
} |
2939 |
} |
| 2940 |
|
2940 |
|
| 2941 |
// For node (shl (and val, mask), N)), returns true if the node is equivalent to |
2941 |
// For node (shl (and val, mask), N)), returns true if the node is equivalent to |
| 2942 |
// UBFIZ. |
2942 |
// UBFIZ. |
| 2943 |
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, |
2943 |
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, |
| 2944 |
SDValue &Src, int &DstLSB, |
2944 |
SDValue &Src, int &DstLSB, |
| 2945 |
int &Width) { |
2945 |
int &Width) { |
| 2946 |
// Caller should have verified that N is a left shift with constant shift |
2946 |
// Caller should have verified that N is a left shift with constant shift |
| 2947 |
// amount; asserts that. |
2947 |
// amount; asserts that. |
| 2948 |
assert(Op.getOpcode() == ISD::SHL && |
2948 |
assert(Op.getOpcode() == ISD::SHL && |
| 2949 |
"Op.getNode() should be a SHL node to call this function"); |
2949 |
"Op.getNode() should be a SHL node to call this function"); |
| 2950 |
assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && |
2950 |
assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && |
| 2951 |
"Op.getNode() should shift ShlImm to call this function"); |
2951 |
"Op.getNode() should shift ShlImm to call this function"); |
| 2952 |
|
2952 |
|
| 2953 |
uint64_t AndImm = 0; |
2953 |
uint64_t AndImm = 0; |
| 2954 |
SDValue Op0 = Op.getOperand(0); |
2954 |
SDValue Op0 = Op.getOperand(0); |
| 2955 |
if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) |
2955 |
if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) |
| 2956 |
return false; |
2956 |
return false; |
| 2957 |
|
2957 |
|
| 2958 |
const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); |
2958 |
const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); |
| 2959 |
if (isMask_64(ShiftedAndImm)) { |
2959 |
if (isMask_64(ShiftedAndImm)) { |
| 2960 |
// AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm |
2960 |
// AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm |
| 2961 |
// should end with Mask, and could be prefixed with random bits if those |
2961 |
// should end with Mask, and could be prefixed with random bits if those |
| 2962 |
// bits are shifted out. |
2962 |
// bits are shifted out. |
| 2963 |
// |
2963 |
// |
| 2964 |
// For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; |
2964 |
// For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; |
| 2965 |
// the AND result corresponding to those bits are shifted out, so it's fine |
2965 |
// the AND result corresponding to those bits are shifted out, so it's fine |
| 2966 |
// to not extract them. |
2966 |
// to not extract them. |
| 2967 |
Width = llvm::countr_one(ShiftedAndImm); |
2967 |
Width = llvm::countr_one(ShiftedAndImm); |
| 2968 |
DstLSB = ShlImm; |
2968 |
DstLSB = ShlImm; |
| 2969 |
Src = Op0.getOperand(0); |
2969 |
Src = Op0.getOperand(0); |
| 2970 |
return true; |
2970 |
return true; |
| 2971 |
} |
2971 |
} |
| 2972 |
return false; |
2972 |
return false; |
| 2973 |
} |
2973 |
} |
| 2974 |
|
2974 |
|
| 2975 |
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, |
2975 |
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, |
| 2976 |
bool BiggerPattern, |
2976 |
bool BiggerPattern, |
| 2977 |
const uint64_t NonZeroBits, |
2977 |
const uint64_t NonZeroBits, |
| 2978 |
SDValue &Src, int &DstLSB, |
2978 |
SDValue &Src, int &DstLSB, |
| 2979 |
int &Width) { |
2979 |
int &Width) { |
| 2980 |
assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); |
2980 |
assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed"); |
| 2981 |
|
2981 |
|
| 2982 |
EVT VT = Op.getValueType(); |
2982 |
EVT VT = Op.getValueType(); |
| 2983 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
2983 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 2984 |
"Caller guarantees that type is i32 or i64"); |
2984 |
"Caller guarantees that type is i32 or i64"); |
| 2985 |
(void)VT; |
2985 |
(void)VT; |
| 2986 |
|
2986 |
|
| 2987 |
uint64_t ShlImm; |
2987 |
uint64_t ShlImm; |
| 2988 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) |
2988 |
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) |
| 2989 |
return false; |
2989 |
return false; |
| 2990 |
|
2990 |
|
| 2991 |
if (!BiggerPattern && !Op.hasOneUse()) |
2991 |
if (!BiggerPattern && !Op.hasOneUse()) |
| 2992 |
return false; |
2992 |
return false; |
| 2993 |
|
2993 |
|
| 2994 |
if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) |
2994 |
if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) |
| 2995 |
return true; |
2995 |
return true; |
| 2996 |
|
2996 |
|
| 2997 |
DstLSB = llvm::countr_zero(NonZeroBits); |
2997 |
DstLSB = llvm::countr_zero(NonZeroBits); |
| 2998 |
Width = llvm::countr_one(NonZeroBits >> DstLSB); |
2998 |
Width = llvm::countr_one(NonZeroBits >> DstLSB); |
| 2999 |
|
2999 |
|
| 3000 |
if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) |
3000 |
if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) |
| 3001 |
return false; |
3001 |
return false; |
| 3002 |
|
3002 |
|
| 3003 |
Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB); |
3003 |
Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB); |
| 3004 |
return true; |
3004 |
return true; |
| 3005 |
} |
3005 |
} |
| 3006 |
|
3006 |
|
| 3007 |
static bool isShiftedMask(uint64_t Mask, EVT VT) { |
3007 |
static bool isShiftedMask(uint64_t Mask, EVT VT) { |
| 3008 |
assert(VT == MVT::i32 || VT == MVT::i64); |
3008 |
assert(VT == MVT::i32 || VT == MVT::i64); |
| 3009 |
if (VT == MVT::i32) |
3009 |
if (VT == MVT::i32) |
| 3010 |
return isShiftedMask_32(Mask); |
3010 |
return isShiftedMask_32(Mask); |
| 3011 |
return isShiftedMask_64(Mask); |
3011 |
return isShiftedMask_64(Mask); |
| 3012 |
} |
3012 |
} |
| 3013 |
|
3013 |
|
| 3014 |
// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being |
3014 |
// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being |
| 3015 |
// inserted only sets known zero bits. |
3015 |
// inserted only sets known zero bits. |
| 3016 |
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { |
3016 |
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { |
| 3017 |
assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); |
3017 |
assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); |
| 3018 |
|
3018 |
|
| 3019 |
EVT VT = N->getValueType(0); |
3019 |
EVT VT = N->getValueType(0); |
| 3020 |
if (VT != MVT::i32 && VT != MVT::i64) |
3020 |
if (VT != MVT::i32 && VT != MVT::i64) |
| 3021 |
return false; |
3021 |
return false; |
| 3022 |
|
3022 |
|
| 3023 |
unsigned BitWidth = VT.getSizeInBits(); |
3023 |
unsigned BitWidth = VT.getSizeInBits(); |
| 3024 |
|
3024 |
|
| 3025 |
uint64_t OrImm; |
3025 |
uint64_t OrImm; |
| 3026 |
if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) |
3026 |
if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) |
| 3027 |
return false; |
3027 |
return false; |
| 3028 |
|
3028 |
|
| 3029 |
// Skip this transformation if the ORR immediate can be encoded in the ORR. |
3029 |
// Skip this transformation if the ORR immediate can be encoded in the ORR. |
| 3030 |
// Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely |
3030 |
// Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely |
| 3031 |
// performance neutral. |
3031 |
// performance neutral. |
| 3032 |
if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) |
3032 |
if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) |
| 3033 |
return false; |
3033 |
return false; |
| 3034 |
|
3034 |
|
| 3035 |
uint64_t MaskImm; |
3035 |
uint64_t MaskImm; |
| 3036 |
SDValue And = N->getOperand(0); |
3036 |
SDValue And = N->getOperand(0); |
| 3037 |
// Must be a single use AND with an immediate operand. |
3037 |
// Must be a single use AND with an immediate operand. |
| 3038 |
if (!And.hasOneUse() || |
3038 |
if (!And.hasOneUse() || |
| 3039 |
!isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) |
3039 |
!isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) |
| 3040 |
return false; |
3040 |
return false; |
| 3041 |
|
3041 |
|
| 3042 |
// Compute the Known Zero for the AND as this allows us to catch more general |
3042 |
// Compute the Known Zero for the AND as this allows us to catch more general |
| 3043 |
// cases than just looking for AND with imm. |
3043 |
// cases than just looking for AND with imm. |
| 3044 |
KnownBits Known = CurDAG->computeKnownBits(And); |
3044 |
KnownBits Known = CurDAG->computeKnownBits(And); |
| 3045 |
|
3045 |
|
| 3046 |
// Non-zero in the sense that they're not provably zero, which is the key |
3046 |
// Non-zero in the sense that they're not provably zero, which is the key |
| 3047 |
// point if we want to use this value. |
3047 |
// point if we want to use this value. |
| 3048 |
uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); |
3048 |
uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); |
| 3049 |
|
3049 |
|
| 3050 |
// The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). |
3050 |
// The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). |
| 3051 |
if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) |
3051 |
if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) |
| 3052 |
return false; |
3052 |
return false; |
| 3053 |
|
3053 |
|
| 3054 |
// The bits being inserted must only set those bits that are known to be zero. |
3054 |
// The bits being inserted must only set those bits that are known to be zero. |
| 3055 |
if ((OrImm & NotKnownZero) != 0) { |
3055 |
if ((OrImm & NotKnownZero) != 0) { |
| 3056 |
// FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't |
3056 |
// FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't |
| 3057 |
// currently handle this case. |
3057 |
// currently handle this case. |
| 3058 |
return false; |
3058 |
return false; |
| 3059 |
} |
3059 |
} |
| 3060 |
|
3060 |
|
| 3061 |
// BFI/BFXIL dst, src, #lsb, #width. |
3061 |
// BFI/BFXIL dst, src, #lsb, #width. |
| 3062 |
int LSB = llvm::countr_one(NotKnownZero); |
3062 |
int LSB = llvm::countr_one(NotKnownZero); |
| 3063 |
int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount(); |
3063 |
int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount(); |
| 3064 |
|
3064 |
|
| 3065 |
// BFI/BFXIL is an alias of BFM, so translate to BFM operands. |
3065 |
// BFI/BFXIL is an alias of BFM, so translate to BFM operands. |
| 3066 |
unsigned ImmR = (BitWidth - LSB) % BitWidth; |
3066 |
unsigned ImmR = (BitWidth - LSB) % BitWidth; |
| 3067 |
unsigned ImmS = Width - 1; |
3067 |
unsigned ImmS = Width - 1; |
| 3068 |
|
3068 |
|
| 3069 |
// If we're creating a BFI instruction avoid cases where we need more |
3069 |
// If we're creating a BFI instruction avoid cases where we need more |
| 3070 |
// instructions to materialize the BFI constant as compared to the original |
3070 |
// instructions to materialize the BFI constant as compared to the original |
| 3071 |
// ORR. A BFXIL will use the same constant as the original ORR, so the code |
3071 |
// ORR. A BFXIL will use the same constant as the original ORR, so the code |
| 3072 |
// should be no worse in this case. |
3072 |
// should be no worse in this case. |
| 3073 |
bool IsBFI = LSB != 0; |
3073 |
bool IsBFI = LSB != 0; |
| 3074 |
uint64_t BFIImm = OrImm >> LSB; |
3074 |
uint64_t BFIImm = OrImm >> LSB; |
| 3075 |
if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { |
3075 |
if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { |
| 3076 |
// We have a BFI instruction and we know the constant can't be materialized |
3076 |
// We have a BFI instruction and we know the constant can't be materialized |
| 3077 |
// with a ORR-immediate with the zero register. |
3077 |
// with a ORR-immediate with the zero register. |
| 3078 |
unsigned OrChunks = 0, BFIChunks = 0; |
3078 |
unsigned OrChunks = 0, BFIChunks = 0; |
| 3079 |
for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { |
3079 |
for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { |
| 3080 |
if (((OrImm >> Shift) & 0xFFFF) != 0) |
3080 |
if (((OrImm >> Shift) & 0xFFFF) != 0) |
| 3081 |
++OrChunks; |
3081 |
++OrChunks; |
| 3082 |
if (((BFIImm >> Shift) & 0xFFFF) != 0) |
3082 |
if (((BFIImm >> Shift) & 0xFFFF) != 0) |
| 3083 |
++BFIChunks; |
3083 |
++BFIChunks; |
| 3084 |
} |
3084 |
} |
| 3085 |
if (BFIChunks > OrChunks) |
3085 |
if (BFIChunks > OrChunks) |
| 3086 |
return false; |
3086 |
return false; |
| 3087 |
} |
3087 |
} |
| 3088 |
|
3088 |
|
| 3089 |
// Materialize the constant to be inserted. |
3089 |
// Materialize the constant to be inserted. |
| 3090 |
SDLoc DL(N); |
3090 |
SDLoc DL(N); |
| 3091 |
unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; |
3091 |
unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; |
| 3092 |
SDNode *MOVI = CurDAG->getMachineNode( |
3092 |
SDNode *MOVI = CurDAG->getMachineNode( |
| 3093 |
MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); |
3093 |
MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); |
| 3094 |
|
3094 |
|
| 3095 |
// Create the BFI/BFXIL instruction. |
3095 |
// Create the BFI/BFXIL instruction. |
| 3096 |
SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), |
3096 |
SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), |
| 3097 |
CurDAG->getTargetConstant(ImmR, DL, VT), |
3097 |
CurDAG->getTargetConstant(ImmR, DL, VT), |
| 3098 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
3098 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
| 3099 |
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; |
3099 |
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; |
| 3100 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
3100 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
| 3101 |
return true; |
3101 |
return true; |
| 3102 |
} |
3102 |
} |
| 3103 |
|
3103 |
|
| 3104 |
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, |
3104 |
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, |
| 3105 |
SDValue &ShiftedOperand, |
3105 |
SDValue &ShiftedOperand, |
| 3106 |
uint64_t &EncodedShiftImm) { |
3106 |
uint64_t &EncodedShiftImm) { |
| 3107 |
// Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR. |
3107 |
// Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR. |
| 3108 |
if (!Dst.hasOneUse()) |
3108 |
if (!Dst.hasOneUse()) |
| 3109 |
return false; |
3109 |
return false; |
| 3110 |
|
3110 |
|
| 3111 |
EVT VT = Dst.getValueType(); |
3111 |
EVT VT = Dst.getValueType(); |
| 3112 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
3112 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 3113 |
"Caller should guarantee that VT is one of i32 or i64"); |
3113 |
"Caller should guarantee that VT is one of i32 or i64"); |
| 3114 |
const unsigned SizeInBits = VT.getSizeInBits(); |
3114 |
const unsigned SizeInBits = VT.getSizeInBits(); |
| 3115 |
|
3115 |
|
| 3116 |
SDLoc DL(Dst.getNode()); |
3116 |
SDLoc DL(Dst.getNode()); |
| 3117 |
uint64_t AndImm, ShlImm; |
3117 |
uint64_t AndImm, ShlImm; |
| 3118 |
if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && |
3118 |
if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && |
| 3119 |
isShiftedMask_64(AndImm)) { |
3119 |
isShiftedMask_64(AndImm)) { |
| 3120 |
// Avoid transforming 'DstOp0' if it has other uses than the AND node. |
3120 |
// Avoid transforming 'DstOp0' if it has other uses than the AND node. |
| 3121 |
SDValue DstOp0 = Dst.getOperand(0); |
3121 |
SDValue DstOp0 = Dst.getOperand(0); |
| 3122 |
if (!DstOp0.hasOneUse()) |
3122 |
if (!DstOp0.hasOneUse()) |
| 3123 |
return false; |
3123 |
return false; |
| 3124 |
|
3124 |
|
| 3125 |
// An example to illustrate the transformation |
3125 |
// An example to illustrate the transformation |
| 3126 |
// From: |
3126 |
// From: |
| 3127 |
// lsr x8, x1, #1 |
3127 |
// lsr x8, x1, #1 |
| 3128 |
// and x8, x8, #0x3f80 |
3128 |
// and x8, x8, #0x3f80 |
| 3129 |
// bfxil x8, x1, #0, #7 |
3129 |
// bfxil x8, x1, #0, #7 |
| 3130 |
// To: |
3130 |
// To: |
| 3131 |
// and x8, x23, #0x7f |
3131 |
// and x8, x23, #0x7f |
| 3132 |
// ubfx x9, x23, #8, #7 |
3132 |
// ubfx x9, x23, #8, #7 |
| 3133 |
// orr x23, x8, x9, lsl #7 |
3133 |
// orr x23, x8, x9, lsl #7 |
| 3134 |
// |
3134 |
// |
| 3135 |
// The number of instructions remains the same, but ORR is faster than BFXIL |
3135 |
// The number of instructions remains the same, but ORR is faster than BFXIL |
| 3136 |
// on many AArch64 processors (or as good as BFXIL if not faster). Besides, |
3136 |
// on many AArch64 processors (or as good as BFXIL if not faster). Besides, |
| 3137 |
// the dependency chain is improved after the transformation. |
3137 |
// the dependency chain is improved after the transformation. |
| 3138 |
uint64_t SrlImm; |
3138 |
uint64_t SrlImm; |
| 3139 |
if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { |
3139 |
if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { |
| 3140 |
uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm); |
3140 |
uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm); |
| 3141 |
if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { |
3141 |
if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { |
| 3142 |
unsigned MaskWidth = |
3142 |
unsigned MaskWidth = |
| 3143 |
llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask); |
3143 |
llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask); |
| 3144 |
unsigned UBFMOpc = |
3144 |
unsigned UBFMOpc = |
| 3145 |
(VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; |
3145 |
(VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; |
| 3146 |
SDNode *UBFMNode = CurDAG->getMachineNode( |
3146 |
SDNode *UBFMNode = CurDAG->getMachineNode( |
| 3147 |
UBFMOpc, DL, VT, DstOp0.getOperand(0), |
3147 |
UBFMOpc, DL, VT, DstOp0.getOperand(0), |
| 3148 |
CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, |
3148 |
CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, |
| 3149 |
VT), |
3149 |
VT), |
| 3150 |
CurDAG->getTargetConstant( |
3150 |
CurDAG->getTargetConstant( |
| 3151 |
SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); |
3151 |
SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); |
| 3152 |
ShiftedOperand = SDValue(UBFMNode, 0); |
3152 |
ShiftedOperand = SDValue(UBFMNode, 0); |
| 3153 |
EncodedShiftImm = AArch64_AM::getShifterImm( |
3153 |
EncodedShiftImm = AArch64_AM::getShifterImm( |
| 3154 |
AArch64_AM::LSL, NumTrailingZeroInShiftedMask); |
3154 |
AArch64_AM::LSL, NumTrailingZeroInShiftedMask); |
| 3155 |
return true; |
3155 |
return true; |
| 3156 |
} |
3156 |
} |
| 3157 |
} |
3157 |
} |
| 3158 |
return false; |
3158 |
return false; |
| 3159 |
} |
3159 |
} |
| 3160 |
|
3160 |
|
| 3161 |
if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { |
3161 |
if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { |
| 3162 |
ShiftedOperand = Dst.getOperand(0); |
3162 |
ShiftedOperand = Dst.getOperand(0); |
| 3163 |
EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm); |
3163 |
EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm); |
| 3164 |
return true; |
3164 |
return true; |
| 3165 |
} |
3165 |
} |
| 3166 |
|
3166 |
|
| 3167 |
uint64_t SrlImm; |
3167 |
uint64_t SrlImm; |
| 3168 |
if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) { |
3168 |
if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) { |
| 3169 |
ShiftedOperand = Dst.getOperand(0); |
3169 |
ShiftedOperand = Dst.getOperand(0); |
| 3170 |
EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm); |
3170 |
EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm); |
| 3171 |
return true; |
3171 |
return true; |
| 3172 |
} |
3172 |
} |
| 3173 |
return false; |
3173 |
return false; |
| 3174 |
} |
3174 |
} |
| 3175 |
|
3175 |
|
| 3176 |
// Given an 'ISD::OR' node that is going to be selected as BFM, analyze |
3176 |
// Given an 'ISD::OR' node that is going to be selected as BFM, analyze |
| 3177 |
// the operands and select it to AArch64::ORR with shifted registers if |
3177 |
// the operands and select it to AArch64::ORR with shifted registers if |
| 3178 |
// that's more efficient. Returns true iff selection to AArch64::ORR happens. |
3178 |
// that's more efficient. Returns true iff selection to AArch64::ORR happens. |
| 3179 |
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, |
3179 |
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, |
| 3180 |
SDValue Src, SDValue Dst, SelectionDAG *CurDAG, |
3180 |
SDValue Src, SDValue Dst, SelectionDAG *CurDAG, |
| 3181 |
const bool BiggerPattern) { |
3181 |
const bool BiggerPattern) { |
| 3182 |
EVT VT = N->getValueType(0); |
3182 |
EVT VT = N->getValueType(0); |
| 3183 |
assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); |
3183 |
assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); |
| 3184 |
assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || |
3184 |
assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || |
| 3185 |
(N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && |
3185 |
(N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && |
| 3186 |
"Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); |
3186 |
"Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); |
| 3187 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
3187 |
assert((VT == MVT::i32 || VT == MVT::i64) && |
| 3188 |
"Expect result type to be i32 or i64 since N is combinable to BFM"); |
3188 |
"Expect result type to be i32 or i64 since N is combinable to BFM"); |
| 3189 |
SDLoc DL(N); |
3189 |
SDLoc DL(N); |
| 3190 |
|
3190 |
|
| 3191 |
// Bail out if BFM simplifies away one node in BFM Dst. |
3191 |
// Bail out if BFM simplifies away one node in BFM Dst. |
| 3192 |
if (OrOpd1 != Dst) |
3192 |
if (OrOpd1 != Dst) |
| 3193 |
return false; |
3193 |
return false; |
| 3194 |
|
3194 |
|
| 3195 |
const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; |
3195 |
const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; |
| 3196 |
// For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer |
3196 |
// For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer |
| 3197 |
// nodes from Rn (or inserts additional shift node) if BiggerPattern is true. |
3197 |
// nodes from Rn (or inserts additional shift node) if BiggerPattern is true. |
| 3198 |
if (BiggerPattern) { |
3198 |
if (BiggerPattern) { |
| 3199 |
uint64_t SrcAndImm; |
3199 |
uint64_t SrcAndImm; |
| 3200 |
if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && |
3200 |
if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && |
| 3201 |
isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { |
3201 |
isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { |
| 3202 |
// OrOpd0 = AND Src, #Mask |
3202 |
// OrOpd0 = AND Src, #Mask |
| 3203 |
// So BFM simplifies away one AND node from Src and doesn't simplify away |
3203 |
// So BFM simplifies away one AND node from Src and doesn't simplify away |
| 3204 |
// nodes from Dst. If ORR with left-shifted operand also simplifies away |
3204 |
// nodes from Dst. If ORR with left-shifted operand also simplifies away |
| 3205 |
// one node (from Rd), ORR is better since it has higher throughput and |
3205 |
// one node (from Rd), ORR is better since it has higher throughput and |
| 3206 |
// smaller latency than BFM on many AArch64 processors (and for the rest |
3206 |
// smaller latency than BFM on many AArch64 processors (and for the rest |
| 3207 |
// ORR is at least as good as BFM). |
3207 |
// ORR is at least as good as BFM). |
| 3208 |
SDValue ShiftedOperand; |
3208 |
SDValue ShiftedOperand; |
| 3209 |
uint64_t EncodedShiftImm; |
3209 |
uint64_t EncodedShiftImm; |
| 3210 |
if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, |
3210 |
if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, |
| 3211 |
EncodedShiftImm)) { |
3211 |
EncodedShiftImm)) { |
| 3212 |
SDValue Ops[] = {OrOpd0, ShiftedOperand, |
3212 |
SDValue Ops[] = {OrOpd0, ShiftedOperand, |
| 3213 |
CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)}; |
3213 |
CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)}; |
| 3214 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
3214 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
| 3215 |
return true; |
3215 |
return true; |
| 3216 |
} |
3216 |
} |
| 3217 |
} |
3217 |
} |
| 3218 |
return false; |
3218 |
return false; |
| 3219 |
} |
3219 |
} |
| 3220 |
|
3220 |
|
| 3221 |
assert((!BiggerPattern) && "BiggerPattern should be handled above"); |
3221 |
assert((!BiggerPattern) && "BiggerPattern should be handled above"); |
| 3222 |
|
3222 |
|
| 3223 |
uint64_t ShlImm; |
3223 |
uint64_t ShlImm; |
| 3224 |
if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { |
3224 |
if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { |
| 3225 |
if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { |
3225 |
if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { |
| 3226 |
SDValue Ops[] = { |
3226 |
SDValue Ops[] = { |
| 3227 |
Dst, Src, |
3227 |
Dst, Src, |
| 3228 |
CurDAG->getTargetConstant( |
3228 |
CurDAG->getTargetConstant( |
| 3229 |
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; |
3229 |
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; |
| 3230 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
3230 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
| 3231 |
return true; |
3231 |
return true; |
| 3232 |
} |
3232 |
} |
| 3233 |
|
3233 |
|
| 3234 |
// Select the following pattern to left-shifted operand rather than BFI. |
3234 |
// Select the following pattern to left-shifted operand rather than BFI. |
| 3235 |
// %val1 = op .. |
3235 |
// %val1 = op .. |
| 3236 |
// %val2 = shl %val1, #imm |
3236 |
// %val2 = shl %val1, #imm |
| 3237 |
// %res = or %val1, %val2 |
3237 |
// %res = or %val1, %val2 |
| 3238 |
// |
3238 |
// |
| 3239 |
// If N is selected to be BFI, we know that |
3239 |
// If N is selected to be BFI, we know that |
| 3240 |
// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into |
3240 |
// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into |
| 3241 |
// BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) |
3241 |
// BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) |
| 3242 |
// |
3242 |
// |
| 3243 |
// Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. |
3243 |
// Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. |
| 3244 |
if (OrOpd0.getOperand(0) == OrOpd1) { |
3244 |
if (OrOpd0.getOperand(0) == OrOpd1) { |
| 3245 |
SDValue Ops[] = { |
3245 |
SDValue Ops[] = { |
| 3246 |
OrOpd1, OrOpd1, |
3246 |
OrOpd1, OrOpd1, |
| 3247 |
CurDAG->getTargetConstant( |
3247 |
CurDAG->getTargetConstant( |
| 3248 |
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; |
3248 |
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)}; |
| 3249 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
3249 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
| 3250 |
return true; |
3250 |
return true; |
| 3251 |
} |
3251 |
} |
| 3252 |
} |
3252 |
} |
| 3253 |
|
3253 |
|
| 3254 |
uint64_t SrlImm; |
3254 |
uint64_t SrlImm; |
| 3255 |
if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { |
3255 |
if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { |
| 3256 |
// Select the following pattern to right-shifted operand rather than BFXIL. |
3256 |
// Select the following pattern to right-shifted operand rather than BFXIL. |
| 3257 |
// %val1 = op .. |
3257 |
// %val1 = op .. |
| 3258 |
// %val2 = lshr %val1, #imm |
3258 |
// %val2 = lshr %val1, #imm |
| 3259 |
// %res = or %val1, %val2 |
3259 |
// %res = or %val1, %val2 |
| 3260 |
// |
3260 |
// |
| 3261 |
// If N is selected to be BFXIL, we know that |
3261 |
// If N is selected to be BFXIL, we know that |
| 3262 |
// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into |
3262 |
// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into |
| 3263 |
// BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) |
3263 |
// BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) |
| 3264 |
// |
3264 |
// |
| 3265 |
// Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. |
3265 |
// Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. |
| 3266 |
if (OrOpd0.getOperand(0) == OrOpd1) { |
3266 |
if (OrOpd0.getOperand(0) == OrOpd1) { |
| 3267 |
SDValue Ops[] = { |
3267 |
SDValue Ops[] = { |
| 3268 |
OrOpd1, OrOpd1, |
3268 |
OrOpd1, OrOpd1, |
| 3269 |
CurDAG->getTargetConstant( |
3269 |
CurDAG->getTargetConstant( |
| 3270 |
AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; |
3270 |
AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; |
| 3271 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
3271 |
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); |
| 3272 |
return true; |
3272 |
return true; |
| 3273 |
} |
3273 |
} |
| 3274 |
} |
3274 |
} |
| 3275 |
|
3275 |
|
| 3276 |
return false; |
3276 |
return false; |
| 3277 |
} |
3277 |
} |
| 3278 |
|
3278 |
|
| 3279 |
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, |
3279 |
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, |
| 3280 |
SelectionDAG *CurDAG) { |
3280 |
SelectionDAG *CurDAG) { |
| 3281 |
assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); |
3281 |
assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); |
| 3282 |
|
3282 |
|
| 3283 |
EVT VT = N->getValueType(0); |
3283 |
EVT VT = N->getValueType(0); |
| 3284 |
if (VT != MVT::i32 && VT != MVT::i64) |
3284 |
if (VT != MVT::i32 && VT != MVT::i64) |
| 3285 |
return false; |
3285 |
return false; |
| 3286 |
|
3286 |
|
| 3287 |
unsigned BitWidth = VT.getSizeInBits(); |
3287 |
unsigned BitWidth = VT.getSizeInBits(); |
| 3288 |
|
3288 |
|
| 3289 |
// Because of simplify-demanded-bits in DAGCombine, involved masks may not |
3289 |
// Because of simplify-demanded-bits in DAGCombine, involved masks may not |
| 3290 |
// have the expected shape. Try to undo that. |
3290 |
// have the expected shape. Try to undo that. |
| 3291 |
|
3291 |
|
| 3292 |
unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero(); |
3292 |
unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero(); |
| 3293 |
unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero(); |
3293 |
unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero(); |
| 3294 |
|
3294 |
|
| 3295 |
// Given a OR operation, check if we have the following pattern |
3295 |
// Given a OR operation, check if we have the following pattern |
| 3296 |
// ubfm c, b, imm, imm2 (or something that does the same jobs, see |
3296 |
// ubfm c, b, imm, imm2 (or something that does the same jobs, see |
| 3297 |
// isBitfieldExtractOp) |
3297 |
// isBitfieldExtractOp) |
| 3298 |
// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and |
3298 |
// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and |
| 3299 |
// countTrailingZeros(mask2) == imm2 - imm + 1 |
3299 |
// countTrailingZeros(mask2) == imm2 - imm + 1 |
| 3300 |
// f = d | c |
3300 |
// f = d | c |
| 3301 |
// if yes, replace the OR instruction with: |
3301 |
// if yes, replace the OR instruction with: |
| 3302 |
// f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 |
3302 |
// f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 |
| 3303 |
|
3303 |
|
| 3304 |
// OR is commutative, check all combinations of operand order and values of |
3304 |
// OR is commutative, check all combinations of operand order and values of |
| 3305 |
// BiggerPattern, i.e. |
3305 |
// BiggerPattern, i.e. |
| 3306 |
// Opd0, Opd1, BiggerPattern=false |
3306 |
// Opd0, Opd1, BiggerPattern=false |
| 3307 |
// Opd1, Opd0, BiggerPattern=false |
3307 |
// Opd1, Opd0, BiggerPattern=false |
| 3308 |
// Opd0, Opd1, BiggerPattern=true |
3308 |
// Opd0, Opd1, BiggerPattern=true |
| 3309 |
// Opd1, Opd0, BiggerPattern=true |
3309 |
// Opd1, Opd0, BiggerPattern=true |
| 3310 |
// Several of these combinations may match, so check with BiggerPattern=false |
3310 |
// Several of these combinations may match, so check with BiggerPattern=false |
| 3311 |
// first since that will produce better results by matching more instructions |
3311 |
// first since that will produce better results by matching more instructions |
| 3312 |
// and/or inserting fewer extra instructions. |
3312 |
// and/or inserting fewer extra instructions. |
| 3313 |
for (int I = 0; I < 4; ++I) { |
3313 |
for (int I = 0; I < 4; ++I) { |
| 3314 |
|
3314 |
|
| 3315 |
SDValue Dst, Src; |
3315 |
SDValue Dst, Src; |
| 3316 |
unsigned ImmR, ImmS; |
3316 |
unsigned ImmR, ImmS; |
| 3317 |
bool BiggerPattern = I / 2; |
3317 |
bool BiggerPattern = I / 2; |
| 3318 |
SDValue OrOpd0Val = N->getOperand(I % 2); |
3318 |
SDValue OrOpd0Val = N->getOperand(I % 2); |
| 3319 |
SDNode *OrOpd0 = OrOpd0Val.getNode(); |
3319 |
SDNode *OrOpd0 = OrOpd0Val.getNode(); |
| 3320 |
SDValue OrOpd1Val = N->getOperand((I + 1) % 2); |
3320 |
SDValue OrOpd1Val = N->getOperand((I + 1) % 2); |
| 3321 |
SDNode *OrOpd1 = OrOpd1Val.getNode(); |
3321 |
SDNode *OrOpd1 = OrOpd1Val.getNode(); |
| 3322 |
|
3322 |
|
| 3323 |
unsigned BFXOpc; |
3323 |
unsigned BFXOpc; |
| 3324 |
int DstLSB, Width; |
3324 |
int DstLSB, Width; |
| 3325 |
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, |
3325 |
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, |
| 3326 |
NumberOfIgnoredLowBits, BiggerPattern)) { |
3326 |
NumberOfIgnoredLowBits, BiggerPattern)) { |
| 3327 |
// Check that the returned opcode is compatible with the pattern, |
3327 |
// Check that the returned opcode is compatible with the pattern, |
| 3328 |
// i.e., same type and zero extended (U and not S) |
3328 |
// i.e., same type and zero extended (U and not S) |
| 3329 |
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || |
3329 |
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || |
| 3330 |
(BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) |
3330 |
(BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) |
| 3331 |
continue; |
3331 |
continue; |
| 3332 |
|
3332 |
|
| 3333 |
// Compute the width of the bitfield insertion |
3333 |
// Compute the width of the bitfield insertion |
| 3334 |
DstLSB = 0; |
3334 |
DstLSB = 0; |
| 3335 |
Width = ImmS - ImmR + 1; |
3335 |
Width = ImmS - ImmR + 1; |
| 3336 |
// FIXME: This constraint is to catch bitfield insertion we may |
3336 |
// FIXME: This constraint is to catch bitfield insertion we may |
| 3337 |
// want to widen the pattern if we want to grab general bitfied |
3337 |
// want to widen the pattern if we want to grab general bitfied |
| 3338 |
// move case |
3338 |
// move case |
| 3339 |
if (Width <= 0) |
3339 |
if (Width <= 0) |
| 3340 |
continue; |
3340 |
continue; |
| 3341 |
|
3341 |
|
| 3342 |
// If the mask on the insertee is correct, we have a BFXIL operation. We |
3342 |
// If the mask on the insertee is correct, we have a BFXIL operation. We |
| 3343 |
// can share the ImmR and ImmS values from the already-computed UBFM. |
3343 |
// can share the ImmR and ImmS values from the already-computed UBFM. |
| 3344 |
} else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, |
3344 |
} else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, |
| 3345 |
BiggerPattern, |
3345 |
BiggerPattern, |
| 3346 |
Src, DstLSB, Width)) { |
3346 |
Src, DstLSB, Width)) { |
| 3347 |
ImmR = (BitWidth - DstLSB) % BitWidth; |
3347 |
ImmR = (BitWidth - DstLSB) % BitWidth; |
| 3348 |
ImmS = Width - 1; |
3348 |
ImmS = Width - 1; |
| 3349 |
} else |
3349 |
} else |
| 3350 |
continue; |
3350 |
continue; |
| 3351 |
|
3351 |
|
| 3352 |
// Check the second part of the pattern |
3352 |
// Check the second part of the pattern |
| 3353 |
EVT VT = OrOpd1Val.getValueType(); |
3353 |
EVT VT = OrOpd1Val.getValueType(); |
| 3354 |
assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); |
3354 |
assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); |
| 3355 |
|
3355 |
|
| 3356 |
// Compute the Known Zero for the candidate of the first operand. |
3356 |
// Compute the Known Zero for the candidate of the first operand. |
| 3357 |
// This allows to catch more general case than just looking for |
3357 |
// This allows to catch more general case than just looking for |
| 3358 |
// AND with imm. Indeed, simplify-demanded-bits may have removed |
3358 |
// AND with imm. Indeed, simplify-demanded-bits may have removed |
| 3359 |
// the AND instruction because it proves it was useless. |
3359 |
// the AND instruction because it proves it was useless. |
| 3360 |
KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); |
3360 |
KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); |
| 3361 |
|
3361 |
|
| 3362 |
// Check if there is enough room for the second operand to appear |
3362 |
// Check if there is enough room for the second operand to appear |
| 3363 |
// in the first one |
3363 |
// in the first one |
| 3364 |
APInt BitsToBeInserted = |
3364 |
APInt BitsToBeInserted = |
| 3365 |
APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); |
3365 |
APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); |
| 3366 |
|
3366 |
|
| 3367 |
if ((BitsToBeInserted & ~Known.Zero) != 0) |
3367 |
if ((BitsToBeInserted & ~Known.Zero) != 0) |
| 3368 |
continue; |
3368 |
continue; |
| 3369 |
|
3369 |
|
| 3370 |
// Set the first operand |
3370 |
// Set the first operand |
| 3371 |
uint64_t Imm; |
3371 |
uint64_t Imm; |
| 3372 |
if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && |
3372 |
if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && |
| 3373 |
isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) |
3373 |
isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) |
| 3374 |
// In that case, we can eliminate the AND |
3374 |
// In that case, we can eliminate the AND |
| 3375 |
Dst = OrOpd1->getOperand(0); |
3375 |
Dst = OrOpd1->getOperand(0); |
| 3376 |
else |
3376 |
else |
| 3377 |
// Maybe the AND has been removed by simplify-demanded-bits |
3377 |
// Maybe the AND has been removed by simplify-demanded-bits |
| 3378 |
// or is useful because it discards more bits |
3378 |
// or is useful because it discards more bits |
| 3379 |
Dst = OrOpd1Val; |
3379 |
Dst = OrOpd1Val; |
| 3380 |
|
3380 |
|
| 3381 |
// Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR |
3381 |
// Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR |
| 3382 |
// with shifted operand is more efficient. |
3382 |
// with shifted operand is more efficient. |
| 3383 |
if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, |
3383 |
if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, |
| 3384 |
BiggerPattern)) |
3384 |
BiggerPattern)) |
| 3385 |
return true; |
3385 |
return true; |
| 3386 |
|
3386 |
|
| 3387 |
// both parts match |
3387 |
// both parts match |
| 3388 |
SDLoc DL(N); |
3388 |
SDLoc DL(N); |
| 3389 |
SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), |
3389 |
SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), |
| 3390 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
3390 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
| 3391 |
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; |
3391 |
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; |
| 3392 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
3392 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
| 3393 |
return true; |
3393 |
return true; |
| 3394 |
} |
3394 |
} |
| 3395 |
|
3395 |
|
| 3396 |
// Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff |
3396 |
// Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff |
| 3397 |
// Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted |
3397 |
// Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted |
| 3398 |
// mask (e.g., 0x000ffff0). |
3398 |
// mask (e.g., 0x000ffff0). |
| 3399 |
uint64_t Mask0Imm, Mask1Imm; |
3399 |
uint64_t Mask0Imm, Mask1Imm; |
| 3400 |
SDValue And0 = N->getOperand(0); |
3400 |
SDValue And0 = N->getOperand(0); |
| 3401 |
SDValue And1 = N->getOperand(1); |
3401 |
SDValue And1 = N->getOperand(1); |
| 3402 |
if (And0.hasOneUse() && And1.hasOneUse() && |
3402 |
if (And0.hasOneUse() && And1.hasOneUse() && |
| 3403 |
isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && |
3403 |
isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && |
| 3404 |
isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && |
3404 |
isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && |
| 3405 |
APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && |
3405 |
APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && |
| 3406 |
(isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { |
3406 |
(isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { |
| 3407 |
|
3407 |
|
| 3408 |
// ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), |
3408 |
// ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), |
| 3409 |
// (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the |
3409 |
// (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the |
| 3410 |
// bits to be inserted. |
3410 |
// bits to be inserted. |
| 3411 |
if (isShiftedMask(Mask0Imm, VT)) { |
3411 |
if (isShiftedMask(Mask0Imm, VT)) { |
| 3412 |
std::swap(And0, And1); |
3412 |
std::swap(And0, And1); |
| 3413 |
std::swap(Mask0Imm, Mask1Imm); |
3413 |
std::swap(Mask0Imm, Mask1Imm); |
| 3414 |
} |
3414 |
} |
| 3415 |
|
3415 |
|
| 3416 |
SDValue Src = And1->getOperand(0); |
3416 |
SDValue Src = And1->getOperand(0); |
| 3417 |
SDValue Dst = And0->getOperand(0); |
3417 |
SDValue Dst = And0->getOperand(0); |
| 3418 |
unsigned LSB = llvm::countr_zero(Mask1Imm); |
3418 |
unsigned LSB = llvm::countr_zero(Mask1Imm); |
| 3419 |
int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount(); |
3419 |
int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount(); |
| 3420 |
|
3420 |
|
| 3421 |
// The BFXIL inserts the low-order bits from a source register, so right |
3421 |
// The BFXIL inserts the low-order bits from a source register, so right |
| 3422 |
// shift the needed bits into place. |
3422 |
// shift the needed bits into place. |
| 3423 |
SDLoc DL(N); |
3423 |
SDLoc DL(N); |
| 3424 |
unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; |
3424 |
unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; |
| 3425 |
uint64_t LsrImm = LSB; |
3425 |
uint64_t LsrImm = LSB; |
| 3426 |
if (Src->hasOneUse() && |
3426 |
if (Src->hasOneUse() && |
| 3427 |
isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && |
3427 |
isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && |
| 3428 |
(LsrImm + LSB) < BitWidth) { |
3428 |
(LsrImm + LSB) < BitWidth) { |
| 3429 |
Src = Src->getOperand(0); |
3429 |
Src = Src->getOperand(0); |
| 3430 |
LsrImm += LSB; |
3430 |
LsrImm += LSB; |
| 3431 |
} |
3431 |
} |
| 3432 |
|
3432 |
|
| 3433 |
SDNode *LSR = CurDAG->getMachineNode( |
3433 |
SDNode *LSR = CurDAG->getMachineNode( |
| 3434 |
ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), |
3434 |
ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), |
| 3435 |
CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); |
3435 |
CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); |
| 3436 |
|
3436 |
|
| 3437 |
// BFXIL is an alias of BFM, so translate to BFM operands. |
3437 |
// BFXIL is an alias of BFM, so translate to BFM operands. |
| 3438 |
unsigned ImmR = (BitWidth - LSB) % BitWidth; |
3438 |
unsigned ImmR = (BitWidth - LSB) % BitWidth; |
| 3439 |
unsigned ImmS = Width - 1; |
3439 |
unsigned ImmS = Width - 1; |
| 3440 |
|
3440 |
|
| 3441 |
// Create the BFXIL instruction. |
3441 |
// Create the BFXIL instruction. |
| 3442 |
SDValue Ops[] = {Dst, SDValue(LSR, 0), |
3442 |
SDValue Ops[] = {Dst, SDValue(LSR, 0), |
| 3443 |
CurDAG->getTargetConstant(ImmR, DL, VT), |
3443 |
CurDAG->getTargetConstant(ImmR, DL, VT), |
| 3444 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
3444 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
| 3445 |
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; |
3445 |
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; |
| 3446 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
3446 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
| 3447 |
return true; |
3447 |
return true; |
| 3448 |
} |
3448 |
} |
| 3449 |
|
3449 |
|
| 3450 |
return false; |
3450 |
return false; |
| 3451 |
} |
3451 |
} |
| 3452 |
|
3452 |
|
| 3453 |
bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { |
3453 |
bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { |
| 3454 |
if (N->getOpcode() != ISD::OR) |
3454 |
if (N->getOpcode() != ISD::OR) |
| 3455 |
return false; |
3455 |
return false; |
| 3456 |
|
3456 |
|
| 3457 |
APInt NUsefulBits; |
3457 |
APInt NUsefulBits; |
| 3458 |
getUsefulBits(SDValue(N, 0), NUsefulBits); |
3458 |
getUsefulBits(SDValue(N, 0), NUsefulBits); |
| 3459 |
|
3459 |
|
| 3460 |
// If all bits are not useful, just return UNDEF. |
3460 |
// If all bits are not useful, just return UNDEF. |
| 3461 |
if (!NUsefulBits) { |
3461 |
if (!NUsefulBits) { |
| 3462 |
CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); |
3462 |
CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); |
| 3463 |
return true; |
3463 |
return true; |
| 3464 |
} |
3464 |
} |
| 3465 |
|
3465 |
|
| 3466 |
if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) |
3466 |
if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) |
| 3467 |
return true; |
3467 |
return true; |
| 3468 |
|
3468 |
|
| 3469 |
return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); |
3469 |
return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); |
| 3470 |
} |
3470 |
} |
| 3471 |
|
3471 |
|
| 3472 |
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the |
3472 |
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the |
| 3473 |
/// equivalent of a left shift by a constant amount followed by an and masking |
3473 |
/// equivalent of a left shift by a constant amount followed by an and masking |
| 3474 |
/// out a contiguous set of bits. |
3474 |
/// out a contiguous set of bits. |
| 3475 |
bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { |
3475 |
bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { |
| 3476 |
if (N->getOpcode() != ISD::AND) |
3476 |
if (N->getOpcode() != ISD::AND) |
| 3477 |
return false; |
3477 |
return false; |
| 3478 |
|
3478 |
|
| 3479 |
EVT VT = N->getValueType(0); |
3479 |
EVT VT = N->getValueType(0); |
| 3480 |
if (VT != MVT::i32 && VT != MVT::i64) |
3480 |
if (VT != MVT::i32 && VT != MVT::i64) |
| 3481 |
return false; |
3481 |
return false; |
| 3482 |
|
3482 |
|
| 3483 |
SDValue Op0; |
3483 |
SDValue Op0; |
| 3484 |
int DstLSB, Width; |
3484 |
int DstLSB, Width; |
| 3485 |
if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, |
3485 |
if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, |
| 3486 |
Op0, DstLSB, Width)) |
3486 |
Op0, DstLSB, Width)) |
| 3487 |
return false; |
3487 |
return false; |
| 3488 |
|
3488 |
|
| 3489 |
// ImmR is the rotate right amount. |
3489 |
// ImmR is the rotate right amount. |
| 3490 |
unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); |
3490 |
unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); |
| 3491 |
// ImmS is the most significant bit of the source to be moved. |
3491 |
// ImmS is the most significant bit of the source to be moved. |
| 3492 |
unsigned ImmS = Width - 1; |
3492 |
unsigned ImmS = Width - 1; |
| 3493 |
|
3493 |
|
| 3494 |
SDLoc DL(N); |
3494 |
SDLoc DL(N); |
| 3495 |
SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), |
3495 |
SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), |
| 3496 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
3496 |
CurDAG->getTargetConstant(ImmS, DL, VT)}; |
| 3497 |
unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; |
3497 |
unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; |
| 3498 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
3498 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
| 3499 |
return true; |
3499 |
return true; |
| 3500 |
} |
3500 |
} |
| 3501 |
|
3501 |
|
| 3502 |
/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in |
3502 |
/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in |
| 3503 |
/// variable shift/rotate instructions. |
3503 |
/// variable shift/rotate instructions. |
| 3504 |
bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { |
3504 |
bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { |
| 3505 |
EVT VT = N->getValueType(0); |
3505 |
EVT VT = N->getValueType(0); |
| 3506 |
|
3506 |
|
| 3507 |
unsigned Opc; |
3507 |
unsigned Opc; |
| 3508 |
switch (N->getOpcode()) { |
3508 |
switch (N->getOpcode()) { |
| 3509 |
case ISD::ROTR: |
3509 |
case ISD::ROTR: |
| 3510 |
Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; |
3510 |
Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; |
| 3511 |
break; |
3511 |
break; |
| 3512 |
case ISD::SHL: |
3512 |
case ISD::SHL: |
| 3513 |
Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; |
3513 |
Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; |
| 3514 |
break; |
3514 |
break; |
| 3515 |
case ISD::SRL: |
3515 |
case ISD::SRL: |
| 3516 |
Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; |
3516 |
Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; |
| 3517 |
break; |
3517 |
break; |
| 3518 |
case ISD::SRA: |
3518 |
case ISD::SRA: |
| 3519 |
Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; |
3519 |
Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; |
| 3520 |
break; |
3520 |
break; |
| 3521 |
default: |
3521 |
default: |
| 3522 |
return false; |
3522 |
return false; |
| 3523 |
} |
3523 |
} |
| 3524 |
|
3524 |
|
| 3525 |
uint64_t Size; |
3525 |
uint64_t Size; |
| 3526 |
uint64_t Bits; |
3526 |
uint64_t Bits; |
| 3527 |
if (VT == MVT::i32) { |
3527 |
if (VT == MVT::i32) { |
| 3528 |
Bits = 5; |
3528 |
Bits = 5; |
| 3529 |
Size = 32; |
3529 |
Size = 32; |
| 3530 |
} else if (VT == MVT::i64) { |
3530 |
} else if (VT == MVT::i64) { |
| 3531 |
Bits = 6; |
3531 |
Bits = 6; |
| 3532 |
Size = 64; |
3532 |
Size = 64; |
| 3533 |
} else |
3533 |
} else |
| 3534 |
return false; |
3534 |
return false; |
| 3535 |
|
3535 |
|
| 3536 |
SDValue ShiftAmt = N->getOperand(1); |
3536 |
SDValue ShiftAmt = N->getOperand(1); |
| 3537 |
SDLoc DL(N); |
3537 |
SDLoc DL(N); |
| 3538 |
SDValue NewShiftAmt; |
3538 |
SDValue NewShiftAmt; |
| 3539 |
|
3539 |
|
| 3540 |
// Skip over an extend of the shift amount. |
3540 |
// Skip over an extend of the shift amount. |
| 3541 |
if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || |
3541 |
if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || |
| 3542 |
ShiftAmt->getOpcode() == ISD::ANY_EXTEND) |
3542 |
ShiftAmt->getOpcode() == ISD::ANY_EXTEND) |
| 3543 |
ShiftAmt = ShiftAmt->getOperand(0); |
3543 |
ShiftAmt = ShiftAmt->getOperand(0); |
| 3544 |
|
3544 |
|
| 3545 |
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { |
3545 |
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { |
| 3546 |
SDValue Add0 = ShiftAmt->getOperand(0); |
3546 |
SDValue Add0 = ShiftAmt->getOperand(0); |
| 3547 |
SDValue Add1 = ShiftAmt->getOperand(1); |
3547 |
SDValue Add1 = ShiftAmt->getOperand(1); |
| 3548 |
uint64_t Add0Imm; |
3548 |
uint64_t Add0Imm; |
| 3549 |
uint64_t Add1Imm; |
3549 |
uint64_t Add1Imm; |
| 3550 |
if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { |
3550 |
if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { |
| 3551 |
// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X |
3551 |
// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X |
| 3552 |
// to avoid the ADD/SUB. |
3552 |
// to avoid the ADD/SUB. |
| 3553 |
NewShiftAmt = Add0; |
3553 |
NewShiftAmt = Add0; |
| 3554 |
} else if (ShiftAmt->getOpcode() == ISD::SUB && |
3554 |
} else if (ShiftAmt->getOpcode() == ISD::SUB && |
| 3555 |
isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && |
3555 |
isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && |
| 3556 |
(Add0Imm % Size == 0)) { |
3556 |
(Add0Imm % Size == 0)) { |
| 3557 |
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X |
3557 |
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X |
| 3558 |
// to generate a NEG instead of a SUB from a constant. |
3558 |
// to generate a NEG instead of a SUB from a constant. |
| 3559 |
unsigned NegOpc; |
3559 |
unsigned NegOpc; |
| 3560 |
unsigned ZeroReg; |
3560 |
unsigned ZeroReg; |
| 3561 |
EVT SubVT = ShiftAmt->getValueType(0); |
3561 |
EVT SubVT = ShiftAmt->getValueType(0); |
| 3562 |
if (SubVT == MVT::i32) { |
3562 |
if (SubVT == MVT::i32) { |
| 3563 |
NegOpc = AArch64::SUBWrr; |
3563 |
NegOpc = AArch64::SUBWrr; |
| 3564 |
ZeroReg = AArch64::WZR; |
3564 |
ZeroReg = AArch64::WZR; |
| 3565 |
} else { |
3565 |
} else { |
| 3566 |
assert(SubVT == MVT::i64); |
3566 |
assert(SubVT == MVT::i64); |
| 3567 |
NegOpc = AArch64::SUBXrr; |
3567 |
NegOpc = AArch64::SUBXrr; |
| 3568 |
ZeroReg = AArch64::XZR; |
3568 |
ZeroReg = AArch64::XZR; |
| 3569 |
} |
3569 |
} |
| 3570 |
SDValue Zero = |
3570 |
SDValue Zero = |
| 3571 |
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); |
3571 |
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); |
| 3572 |
MachineSDNode *Neg = |
3572 |
MachineSDNode *Neg = |
| 3573 |
CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); |
3573 |
CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); |
| 3574 |
NewShiftAmt = SDValue(Neg, 0); |
3574 |
NewShiftAmt = SDValue(Neg, 0); |
| 3575 |
} else if (ShiftAmt->getOpcode() == ISD::SUB && |
3575 |
} else if (ShiftAmt->getOpcode() == ISD::SUB && |
| 3576 |
isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { |
3576 |
isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { |
| 3577 |
// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X |
3577 |
// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X |
| 3578 |
// to generate a NOT instead of a SUB from a constant. |
3578 |
// to generate a NOT instead of a SUB from a constant. |
| 3579 |
unsigned NotOpc; |
3579 |
unsigned NotOpc; |
| 3580 |
unsigned ZeroReg; |
3580 |
unsigned ZeroReg; |
| 3581 |
EVT SubVT = ShiftAmt->getValueType(0); |
3581 |
EVT SubVT = ShiftAmt->getValueType(0); |
| 3582 |
if (SubVT == MVT::i32) { |
3582 |
if (SubVT == MVT::i32) { |
| 3583 |
NotOpc = AArch64::ORNWrr; |
3583 |
NotOpc = AArch64::ORNWrr; |
| 3584 |
ZeroReg = AArch64::WZR; |
3584 |
ZeroReg = AArch64::WZR; |
| 3585 |
} else { |
3585 |
} else { |
| 3586 |
assert(SubVT == MVT::i64); |
3586 |
assert(SubVT == MVT::i64); |
| 3587 |
NotOpc = AArch64::ORNXrr; |
3587 |
NotOpc = AArch64::ORNXrr; |
| 3588 |
ZeroReg = AArch64::XZR; |
3588 |
ZeroReg = AArch64::XZR; |
| 3589 |
} |
3589 |
} |
| 3590 |
SDValue Zero = |
3590 |
SDValue Zero = |
| 3591 |
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); |
3591 |
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); |
| 3592 |
MachineSDNode *Not = |
3592 |
MachineSDNode *Not = |
| 3593 |
CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); |
3593 |
CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); |
| 3594 |
NewShiftAmt = SDValue(Not, 0); |
3594 |
NewShiftAmt = SDValue(Not, 0); |
| 3595 |
} else |
3595 |
} else |
| 3596 |
return false; |
3596 |
return false; |
| 3597 |
} else { |
3597 |
} else { |
| 3598 |
// If the shift amount is masked with an AND, check that the mask covers the |
3598 |
// If the shift amount is masked with an AND, check that the mask covers the |
| 3599 |
// bits that are implicitly ANDed off by the above opcodes and if so, skip |
3599 |
// bits that are implicitly ANDed off by the above opcodes and if so, skip |
| 3600 |
// the AND. |
3600 |
// the AND. |
| 3601 |
uint64_t MaskImm; |
3601 |
uint64_t MaskImm; |
| 3602 |
if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && |
3602 |
if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && |
| 3603 |
!isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) |
3603 |
!isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) |
| 3604 |
return false; |
3604 |
return false; |
| 3605 |
|
3605 |
|
| 3606 |
if ((unsigned)llvm::countr_one(MaskImm) < Bits) |
3606 |
if ((unsigned)llvm::countr_one(MaskImm) < Bits) |
| 3607 |
return false; |
3607 |
return false; |
| 3608 |
|
3608 |
|
| 3609 |
NewShiftAmt = ShiftAmt->getOperand(0); |
3609 |
NewShiftAmt = ShiftAmt->getOperand(0); |
| 3610 |
} |
3610 |
} |
| 3611 |
|
3611 |
|
| 3612 |
// Narrow/widen the shift amount to match the size of the shift operation. |
3612 |
// Narrow/widen the shift amount to match the size of the shift operation. |
| 3613 |
if (VT == MVT::i32) |
3613 |
if (VT == MVT::i32) |
| 3614 |
NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); |
3614 |
NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); |
| 3615 |
else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { |
3615 |
else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { |
| 3616 |
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); |
3616 |
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); |
| 3617 |
MachineSDNode *Ext = CurDAG->getMachineNode( |
3617 |
MachineSDNode *Ext = CurDAG->getMachineNode( |
| 3618 |
AArch64::SUBREG_TO_REG, DL, VT, |
3618 |
AArch64::SUBREG_TO_REG, DL, VT, |
| 3619 |
CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); |
3619 |
CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); |
| 3620 |
NewShiftAmt = SDValue(Ext, 0); |
3620 |
NewShiftAmt = SDValue(Ext, 0); |
| 3621 |
} |
3621 |
} |
| 3622 |
|
3622 |
|
| 3623 |
SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; |
3623 |
SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; |
| 3624 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
3624 |
CurDAG->SelectNodeTo(N, Opc, VT, Ops); |
| 3625 |
return true; |
3625 |
return true; |
| 3626 |
} |
3626 |
} |
| 3627 |
|
3627 |
|
| 3628 |
bool |
3628 |
bool |
| 3629 |
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, |
3629 |
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, |
| 3630 |
unsigned RegWidth) { |
3630 |
unsigned RegWidth) { |
| 3631 |
APFloat FVal(0.0); |
3631 |
APFloat FVal(0.0); |
| 3632 |
if (ConstantFPSDNode *CN = dyn_cast(N)) |
3632 |
if (ConstantFPSDNode *CN = dyn_cast(N)) |
| 3633 |
FVal = CN->getValueAPF(); |
3633 |
FVal = CN->getValueAPF(); |
| 3634 |
else if (LoadSDNode *LN = dyn_cast(N)) { |
3634 |
else if (LoadSDNode *LN = dyn_cast(N)) { |
| 3635 |
// Some otherwise illegal constants are allowed in this case. |
3635 |
// Some otherwise illegal constants are allowed in this case. |
| 3636 |
if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || |
3636 |
if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || |
| 3637 |
!isa(LN->getOperand(1)->getOperand(1))) |
3637 |
!isa(LN->getOperand(1)->getOperand(1))) |
| 3638 |
return false; |
3638 |
return false; |
| 3639 |
|
3639 |
|
| 3640 |
ConstantPoolSDNode *CN = |
3640 |
ConstantPoolSDNode *CN = |
| 3641 |
dyn_cast(LN->getOperand(1)->getOperand(1)); |
3641 |
dyn_cast(LN->getOperand(1)->getOperand(1)); |
| 3642 |
FVal = cast(CN->getConstVal())->getValueAPF(); |
3642 |
FVal = cast(CN->getConstVal())->getValueAPF(); |
| 3643 |
} else |
3643 |
} else |
| 3644 |
return false; |
3644 |
return false; |
| 3645 |
|
3645 |
|
| 3646 |
// An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits |
3646 |
// An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits |
| 3647 |
// is between 1 and 32 for a destination w-register, or 1 and 64 for an |
3647 |
// is between 1 and 32 for a destination w-register, or 1 and 64 for an |
| 3648 |
// x-register. |
3648 |
// x-register. |
| 3649 |
// |
3649 |
// |
| 3650 |
// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we |
3650 |
// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we |
| 3651 |
// want THIS_NODE to be 2^fbits. This is much easier to deal with using |
3651 |
// want THIS_NODE to be 2^fbits. This is much easier to deal with using |
| 3652 |
// integers. |
3652 |
// integers. |
| 3653 |
bool IsExact; |
3653 |
bool IsExact; |
| 3654 |
|
3654 |
|
| 3655 |
// fbits is between 1 and 64 in the worst-case, which means the fmul |
3655 |
// fbits is between 1 and 64 in the worst-case, which means the fmul |
| 3656 |
// could have 2^64 as an actual operand. Need 65 bits of precision. |
3656 |
// could have 2^64 as an actual operand. Need 65 bits of precision. |
| 3657 |
APSInt IntVal(65, true); |
3657 |
APSInt IntVal(65, true); |
| 3658 |
FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); |
3658 |
FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); |
| 3659 |
|
3659 |
|
| 3660 |
// N.b. isPowerOf2 also checks for > 0. |
3660 |
// N.b. isPowerOf2 also checks for > 0. |
| 3661 |
if (!IsExact || !IntVal.isPowerOf2()) return false; |
3661 |
if (!IsExact || !IntVal.isPowerOf2()) return false; |
| 3662 |
unsigned FBits = IntVal.logBase2(); |
3662 |
unsigned FBits = IntVal.logBase2(); |
| 3663 |
|
3663 |
|
| 3664 |
// Checks above should have guaranteed that we haven't lost information in |
3664 |
// Checks above should have guaranteed that we haven't lost information in |
| 3665 |
// finding FBits, but it must still be in range. |
3665 |
// finding FBits, but it must still be in range. |
| 3666 |
if (FBits == 0 || FBits > RegWidth) return false; |
3666 |
if (FBits == 0 || FBits > RegWidth) return false; |
| 3667 |
|
3667 |
|
| 3668 |
FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); |
3668 |
FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); |
| 3669 |
return true; |
3669 |
return true; |
| 3670 |
} |
3670 |
} |
| 3671 |
|
3671 |
|
| 3672 |
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields |
3672 |
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields |
| 3673 |
// of the string and obtains the integer values from them and combines these |
3673 |
// of the string and obtains the integer values from them and combines these |
| 3674 |
// into a single value to be used in the MRS/MSR instruction. |
3674 |
// into a single value to be used in the MRS/MSR instruction. |
| 3675 |
static int getIntOperandFromRegisterString(StringRef RegString) { |
3675 |
static int getIntOperandFromRegisterString(StringRef RegString) { |
| 3676 |
SmallVector Fields; |
3676 |
SmallVector Fields; |
| 3677 |
RegString.split(Fields, ':'); |
3677 |
RegString.split(Fields, ':'); |
| 3678 |
|
3678 |
|
| 3679 |
if (Fields.size() == 1) |
3679 |
if (Fields.size() == 1) |
| 3680 |
return -1; |
3680 |
return -1; |
| 3681 |
|
3681 |
|
| 3682 |
assert(Fields.size() == 5 |
3682 |
assert(Fields.size() == 5 |
| 3683 |
&& "Invalid number of fields in read register string"); |
3683 |
&& "Invalid number of fields in read register string"); |
| 3684 |
|
3684 |
|
| 3685 |
SmallVector Ops; |
3685 |
SmallVector Ops; |
| 3686 |
bool AllIntFields = true; |
3686 |
bool AllIntFields = true; |
| 3687 |
|
3687 |
|
| 3688 |
for (StringRef Field : Fields) { |
3688 |
for (StringRef Field : Fields) { |
| 3689 |
unsigned IntField; |
3689 |
unsigned IntField; |
| 3690 |
AllIntFields &= !Field.getAsInteger(10, IntField); |
3690 |
AllIntFields &= !Field.getAsInteger(10, IntField); |
| 3691 |
Ops.push_back(IntField); |
3691 |
Ops.push_back(IntField); |
| 3692 |
} |
3692 |
} |
| 3693 |
|
3693 |
|
| 3694 |
assert(AllIntFields && |
3694 |
assert(AllIntFields && |
| 3695 |
"Unexpected non-integer value in special register string."); |
3695 |
"Unexpected non-integer value in special register string."); |
| 3696 |
(void)AllIntFields; |
3696 |
(void)AllIntFields; |
| 3697 |
|
3697 |
|
| 3698 |
// Need to combine the integer fields of the string into a single value |
3698 |
// Need to combine the integer fields of the string into a single value |
| 3699 |
// based on the bit encoding of MRS/MSR instruction. |
3699 |
// based on the bit encoding of MRS/MSR instruction. |
| 3700 |
return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | |
3700 |
return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | |
| 3701 |
(Ops[3] << 3) | (Ops[4]); |
3701 |
(Ops[3] << 3) | (Ops[4]); |
| 3702 |
} |
3702 |
} |
| 3703 |
|
3703 |
|
| 3704 |
// Lower the read_register intrinsic to an MRS instruction node if the special |
3704 |
// Lower the read_register intrinsic to an MRS instruction node if the special |
| 3705 |
// register string argument is either of the form detailed in the ALCE (the |
3705 |
// register string argument is either of the form detailed in the ALCE (the |
| 3706 |
// form described in getIntOperandsFromRegsterString) or is a named register |
3706 |
// form described in getIntOperandsFromRegsterString) or is a named register |
| 3707 |
// known by the MRS SysReg mapper. |
3707 |
// known by the MRS SysReg mapper. |
| 3708 |
bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { |
3708 |
bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { |
| 3709 |
const auto *MD = cast(N->getOperand(1)); |
3709 |
const auto *MD = cast(N->getOperand(1)); |
| 3710 |
const auto *RegString = cast(MD->getMD()->getOperand(0)); |
3710 |
const auto *RegString = cast(MD->getMD()->getOperand(0)); |
| 3711 |
SDLoc DL(N); |
3711 |
SDLoc DL(N); |
| 3712 |
|
3712 |
|
| 3713 |
bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS; |
3713 |
bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS; |
| 3714 |
|
3714 |
|
| 3715 |
unsigned Opcode64Bit = AArch64::MRS; |
3715 |
unsigned Opcode64Bit = AArch64::MRS; |
| 3716 |
int Imm = getIntOperandFromRegisterString(RegString->getString()); |
3716 |
int Imm = getIntOperandFromRegisterString(RegString->getString()); |
| 3717 |
if (Imm == -1) { |
3717 |
if (Imm == -1) { |
| 3718 |
// No match, Use the sysreg mapper to map the remaining possible strings to |
3718 |
// No match, Use the sysreg mapper to map the remaining possible strings to |
| 3719 |
// the value for the register to be used for the instruction operand. |
3719 |
// the value for the register to be used for the instruction operand. |
| 3720 |
const auto *TheReg = |
3720 |
const auto *TheReg = |
| 3721 |
AArch64SysReg::lookupSysRegByName(RegString->getString()); |
3721 |
AArch64SysReg::lookupSysRegByName(RegString->getString()); |
| 3722 |
if (TheReg && TheReg->Readable && |
3722 |
if (TheReg && TheReg->Readable && |
| 3723 |
TheReg->haveFeatures(Subtarget->getFeatureBits())) |
3723 |
TheReg->haveFeatures(Subtarget->getFeatureBits())) |
| 3724 |
Imm = TheReg->Encoding; |
3724 |
Imm = TheReg->Encoding; |
| 3725 |
else |
3725 |
else |
| 3726 |
Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); |
3726 |
Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); |
| 3727 |
|
3727 |
|
| 3728 |
if (Imm == -1) { |
3728 |
if (Imm == -1) { |
| 3729 |
// Still no match, see if this is "pc" or give up. |
3729 |
// Still no match, see if this is "pc" or give up. |
| 3730 |
if (!ReadIs128Bit && RegString->getString() == "pc") { |
3730 |
if (!ReadIs128Bit && RegString->getString() == "pc") { |
| 3731 |
Opcode64Bit = AArch64::ADR; |
3731 |
Opcode64Bit = AArch64::ADR; |
| 3732 |
Imm = 0; |
3732 |
Imm = 0; |
| 3733 |
} else { |
3733 |
} else { |
| 3734 |
return false; |
3734 |
return false; |
| 3735 |
} |
3735 |
} |
| 3736 |
} |
3736 |
} |
| 3737 |
} |
3737 |
} |
| 3738 |
|
3738 |
|
| 3739 |
SDValue InChain = N->getOperand(0); |
3739 |
SDValue InChain = N->getOperand(0); |
| 3740 |
SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32); |
3740 |
SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32); |
| 3741 |
if (!ReadIs128Bit) { |
3741 |
if (!ReadIs128Bit) { |
| 3742 |
CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */, |
3742 |
CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */, |
| 3743 |
{SysRegImm, InChain}); |
3743 |
{SysRegImm, InChain}); |
| 3744 |
} else { |
3744 |
} else { |
| 3745 |
SDNode *MRRS = CurDAG->getMachineNode( |
3745 |
SDNode *MRRS = CurDAG->getMachineNode( |
| 3746 |
AArch64::MRRS, DL, |
3746 |
AArch64::MRRS, DL, |
| 3747 |
{MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */}, |
3747 |
{MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */}, |
| 3748 |
{SysRegImm, InChain}); |
3748 |
{SysRegImm, InChain}); |
| 3749 |
|
3749 |
|
| 3750 |
// Sysregs are not endian. The even register always contains the low half |
3750 |
// Sysregs are not endian. The even register always contains the low half |
| 3751 |
// of the register. |
3751 |
// of the register. |
| 3752 |
SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64, |
3752 |
SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64, |
| 3753 |
SDValue(MRRS, 0)); |
3753 |
SDValue(MRRS, 0)); |
| 3754 |
SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64, |
3754 |
SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64, |
| 3755 |
SDValue(MRRS, 0)); |
3755 |
SDValue(MRRS, 0)); |
| 3756 |
SDValue OutChain = SDValue(MRRS, 1); |
3756 |
SDValue OutChain = SDValue(MRRS, 1); |
| 3757 |
|
3757 |
|
| 3758 |
ReplaceUses(SDValue(N, 0), Lo); |
3758 |
ReplaceUses(SDValue(N, 0), Lo); |
| 3759 |
ReplaceUses(SDValue(N, 1), Hi); |
3759 |
ReplaceUses(SDValue(N, 1), Hi); |
| 3760 |
ReplaceUses(SDValue(N, 2), OutChain); |
3760 |
ReplaceUses(SDValue(N, 2), OutChain); |
| 3761 |
}; |
3761 |
}; |
| 3762 |
return true; |
3762 |
return true; |
| 3763 |
} |
3763 |
} |
| 3764 |
|
3764 |
|
| 3765 |
// Lower the write_register intrinsic to an MSR instruction node if the special |
3765 |
// Lower the write_register intrinsic to an MSR instruction node if the special |
| 3766 |
// register string argument is either of the form detailed in the ALCE (the |
3766 |
// register string argument is either of the form detailed in the ALCE (the |
| 3767 |
// form described in getIntOperandsFromRegsterString) or is a named register |
3767 |
// form described in getIntOperandsFromRegsterString) or is a named register |
| 3768 |
// known by the MSR SysReg mapper. |
3768 |
// known by the MSR SysReg mapper. |
| 3769 |
bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { |
3769 |
bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { |
| 3770 |
const auto *MD = cast(N->getOperand(1)); |
3770 |
const auto *MD = cast(N->getOperand(1)); |
| 3771 |
const auto *RegString = cast(MD->getMD()->getOperand(0)); |
3771 |
const auto *RegString = cast(MD->getMD()->getOperand(0)); |
| 3772 |
SDLoc DL(N); |
3772 |
SDLoc DL(N); |
| 3773 |
|
3773 |
|
| 3774 |
bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR; |
3774 |
bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR; |
| 3775 |
|
3775 |
|
| 3776 |
if (!WriteIs128Bit) { |
3776 |
if (!WriteIs128Bit) { |
| 3777 |
// Check if the register was one of those allowed as the pstatefield value |
3777 |
// Check if the register was one of those allowed as the pstatefield value |
| 3778 |
// in the MSR (immediate) instruction. To accept the values allowed in the |
3778 |
// in the MSR (immediate) instruction. To accept the values allowed in the |
| 3779 |
// pstatefield for the MSR (immediate) instruction, we also require that an |
3779 |
// pstatefield for the MSR (immediate) instruction, we also require that an |
| 3780 |
// immediate value has been provided as an argument, we know that this is |
3780 |
// immediate value has been provided as an argument, we know that this is |
| 3781 |
// the case as it has been ensured by semantic checking. |
3781 |
// the case as it has been ensured by semantic checking. |
| 3782 |
auto trySelectPState = [&](auto PMapper, unsigned State) { |
3782 |
auto trySelectPState = [&](auto PMapper, unsigned State) { |
| 3783 |
if (PMapper) { |
3783 |
if (PMapper) { |
| 3784 |
assert(isa(N->getOperand(2)) && |
3784 |
assert(isa(N->getOperand(2)) && |
| 3785 |
"Expected a constant integer expression."); |
3785 |
"Expected a constant integer expression."); |
| 3786 |
unsigned Reg = PMapper->Encoding; |
3786 |
unsigned Reg = PMapper->Encoding; |
| 3787 |
uint64_t Immed = cast(N->getOperand(2))->getZExtValue(); |
3787 |
uint64_t Immed = cast(N->getOperand(2))->getZExtValue(); |
| 3788 |
CurDAG->SelectNodeTo( |
3788 |
CurDAG->SelectNodeTo( |
| 3789 |
N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), |
3789 |
N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), |
| 3790 |
CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); |
3790 |
CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); |
| 3791 |
return true; |
3791 |
return true; |
| 3792 |
} |
3792 |
} |
| 3793 |
return false; |
3793 |
return false; |
| 3794 |
}; |
3794 |
}; |
| 3795 |
|
3795 |
|
| 3796 |
if (trySelectPState( |
3796 |
if (trySelectPState( |
| 3797 |
AArch64PState::lookupPStateImm0_15ByName(RegString->getString()), |
3797 |
AArch64PState::lookupPStateImm0_15ByName(RegString->getString()), |
| 3798 |
AArch64::MSRpstateImm4)) |
3798 |
AArch64::MSRpstateImm4)) |
| 3799 |
return true; |
3799 |
return true; |
| 3800 |
if (trySelectPState( |
3800 |
if (trySelectPState( |
| 3801 |
AArch64PState::lookupPStateImm0_1ByName(RegString->getString()), |
3801 |
AArch64PState::lookupPStateImm0_1ByName(RegString->getString()), |
| 3802 |
AArch64::MSRpstateImm1)) |
3802 |
AArch64::MSRpstateImm1)) |
| 3803 |
return true; |
3803 |
return true; |
| 3804 |
} |
3804 |
} |
| 3805 |
|
3805 |
|
| 3806 |
int Imm = getIntOperandFromRegisterString(RegString->getString()); |
3806 |
int Imm = getIntOperandFromRegisterString(RegString->getString()); |
| 3807 |
if (Imm == -1) { |
3807 |
if (Imm == -1) { |
| 3808 |
// Use the sysreg mapper to attempt to map the remaining possible strings |
3808 |
// Use the sysreg mapper to attempt to map the remaining possible strings |
| 3809 |
// to the value for the register to be used for the MSR (register) |
3809 |
// to the value for the register to be used for the MSR (register) |
| 3810 |
// instruction operand. |
3810 |
// instruction operand. |
| 3811 |
auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); |
3811 |
auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); |
| 3812 |
if (TheReg && TheReg->Writeable && |
3812 |
if (TheReg && TheReg->Writeable && |
| 3813 |
TheReg->haveFeatures(Subtarget->getFeatureBits())) |
3813 |
TheReg->haveFeatures(Subtarget->getFeatureBits())) |
| 3814 |
Imm = TheReg->Encoding; |
3814 |
Imm = TheReg->Encoding; |
| 3815 |
else |
3815 |
else |
| 3816 |
Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); |
3816 |
Imm = AArch64SysReg::parseGenericRegister(RegString->getString()); |
| 3817 |
|
3817 |
|
| 3818 |
if (Imm == -1) |
3818 |
if (Imm == -1) |
| 3819 |
return false; |
3819 |
return false; |
| 3820 |
} |
3820 |
} |
| 3821 |
|
3821 |
|
| 3822 |
SDValue InChain = N->getOperand(0); |
3822 |
SDValue InChain = N->getOperand(0); |
| 3823 |
if (!WriteIs128Bit) { |
3823 |
if (!WriteIs128Bit) { |
| 3824 |
CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other, |
3824 |
CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other, |
| 3825 |
CurDAG->getTargetConstant(Imm, DL, MVT::i32), |
3825 |
CurDAG->getTargetConstant(Imm, DL, MVT::i32), |
| 3826 |
N->getOperand(2), InChain); |
3826 |
N->getOperand(2), InChain); |
| 3827 |
} else { |
3827 |
} else { |
| 3828 |
// No endian swap. The lower half always goes into the even subreg, and the |
3828 |
// No endian swap. The lower half always goes into the even subreg, and the |
| 3829 |
// higher half always into the odd supreg. |
3829 |
// higher half always into the odd supreg. |
| 3830 |
SDNode *Pair = CurDAG->getMachineNode( |
3830 |
SDNode *Pair = CurDAG->getMachineNode( |
| 3831 |
TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */, |
3831 |
TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */, |
| 3832 |
{CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL, |
3832 |
{CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL, |
| 3833 |
MVT::i32), |
3833 |
MVT::i32), |
| 3834 |
N->getOperand(2), |
3834 |
N->getOperand(2), |
| 3835 |
CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32), |
3835 |
CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32), |
| 3836 |
N->getOperand(3), |
3836 |
N->getOperand(3), |
| 3837 |
CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)}); |
3837 |
CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)}); |
| 3838 |
|
3838 |
|
| 3839 |
CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other, |
3839 |
CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other, |
| 3840 |
CurDAG->getTargetConstant(Imm, DL, MVT::i32), |
3840 |
CurDAG->getTargetConstant(Imm, DL, MVT::i32), |
| 3841 |
SDValue(Pair, 0), InChain); |
3841 |
SDValue(Pair, 0), InChain); |
| 3842 |
} |
3842 |
} |
| 3843 |
|
3843 |
|
| 3844 |
return true; |
3844 |
return true; |
| 3845 |
} |
3845 |
} |
| 3846 |
|
3846 |
|
| 3847 |
/// We've got special pseudo-instructions for these |
3847 |
/// We've got special pseudo-instructions for these |
| 3848 |
bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { |
3848 |
bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { |
| 3849 |
unsigned Opcode; |
3849 |
unsigned Opcode; |
| 3850 |
EVT MemTy = cast(N)->getMemoryVT(); |
3850 |
EVT MemTy = cast(N)->getMemoryVT(); |
| 3851 |
|
3851 |
|
| 3852 |
// Leave IR for LSE if subtarget supports it. |
3852 |
// Leave IR for LSE if subtarget supports it. |
| 3853 |
if (Subtarget->hasLSE()) return false; |
3853 |
if (Subtarget->hasLSE()) return false; |
| 3854 |
|
3854 |
|
| 3855 |
if (MemTy == MVT::i8) |
3855 |
if (MemTy == MVT::i8) |
| 3856 |
Opcode = AArch64::CMP_SWAP_8; |
3856 |
Opcode = AArch64::CMP_SWAP_8; |
| 3857 |
else if (MemTy == MVT::i16) |
3857 |
else if (MemTy == MVT::i16) |
| 3858 |
Opcode = AArch64::CMP_SWAP_16; |
3858 |
Opcode = AArch64::CMP_SWAP_16; |
| 3859 |
else if (MemTy == MVT::i32) |
3859 |
else if (MemTy == MVT::i32) |
| 3860 |
Opcode = AArch64::CMP_SWAP_32; |
3860 |
Opcode = AArch64::CMP_SWAP_32; |
| 3861 |
else if (MemTy == MVT::i64) |
3861 |
else if (MemTy == MVT::i64) |
| 3862 |
Opcode = AArch64::CMP_SWAP_64; |
3862 |
Opcode = AArch64::CMP_SWAP_64; |
| 3863 |
else |
3863 |
else |
| 3864 |
llvm_unreachable("Unknown AtomicCmpSwap type"); |
3864 |
llvm_unreachable("Unknown AtomicCmpSwap type"); |
| 3865 |
|
3865 |
|
| 3866 |
MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; |
3866 |
MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; |
| 3867 |
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), |
3867 |
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), |
| 3868 |
N->getOperand(0)}; |
3868 |
N->getOperand(0)}; |
| 3869 |
SDNode *CmpSwap = CurDAG->getMachineNode( |
3869 |
SDNode *CmpSwap = CurDAG->getMachineNode( |
| 3870 |
Opcode, SDLoc(N), |
3870 |
Opcode, SDLoc(N), |
| 3871 |
CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); |
3871 |
CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); |
| 3872 |
|
3872 |
|
| 3873 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
3873 |
MachineMemOperand *MemOp = cast(N)->getMemOperand(); |
| 3874 |
CurDAG->setNodeMemRefs(cast(CmpSwap), {MemOp}); |
3874 |
CurDAG->setNodeMemRefs(cast(CmpSwap), {MemOp}); |
| 3875 |
|
3875 |
|
| 3876 |
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); |
3876 |
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); |
| 3877 |
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); |
3877 |
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); |
| 3878 |
CurDAG->RemoveDeadNode(N); |
3878 |
CurDAG->RemoveDeadNode(N); |
| 3879 |
|
3879 |
|
| 3880 |
return true; |
3880 |
return true; |
| 3881 |
} |
3881 |
} |
| 3882 |
|
3882 |
|
| 3883 |
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, |
3883 |
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, |
| 3884 |
SDValue &Shift) { |
3884 |
SDValue &Shift) { |
| 3885 |
if (!isa(N)) |
3885 |
if (!isa(N)) |
| 3886 |
return false; |
3886 |
return false; |
| 3887 |
|
3887 |
|
| 3888 |
SDLoc DL(N); |
3888 |
SDLoc DL(N); |
| 3889 |
uint64_t Val = cast(N) |
3889 |
uint64_t Val = cast(N) |
| 3890 |
->getAPIntValue() |
3890 |
->getAPIntValue() |
| 3891 |
.trunc(VT.getFixedSizeInBits()) |
3891 |
.trunc(VT.getFixedSizeInBits()) |
| 3892 |
.getZExtValue(); |
3892 |
.getZExtValue(); |
| 3893 |
|
3893 |
|
| 3894 |
switch (VT.SimpleTy) { |
3894 |
switch (VT.SimpleTy) { |
| 3895 |
case MVT::i8: |
3895 |
case MVT::i8: |
| 3896 |
// All immediates are supported. |
3896 |
// All immediates are supported. |
| 3897 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
3897 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
| 3898 |
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); |
3898 |
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); |
| 3899 |
return true; |
3899 |
return true; |
| 3900 |
case MVT::i16: |
3900 |
case MVT::i16: |
| 3901 |
case MVT::i32: |
3901 |
case MVT::i32: |
| 3902 |
case MVT::i64: |
3902 |
case MVT::i64: |
| 3903 |
// Support 8bit unsigned immediates. |
3903 |
// Support 8bit unsigned immediates. |
| 3904 |
if (Val <= 255) { |
3904 |
if (Val <= 255) { |
| 3905 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
3905 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
| 3906 |
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); |
3906 |
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); |
| 3907 |
return true; |
3907 |
return true; |
| 3908 |
} |
3908 |
} |
| 3909 |
// Support 16bit unsigned immediates that are a multiple of 256. |
3909 |
// Support 16bit unsigned immediates that are a multiple of 256. |
| 3910 |
if (Val <= 65280 && Val % 256 == 0) { |
3910 |
if (Val <= 65280 && Val % 256 == 0) { |
| 3911 |
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); |
3911 |
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); |
| 3912 |
Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); |
3912 |
Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); |
| 3913 |
return true; |
3913 |
return true; |
| 3914 |
} |
3914 |
} |
| 3915 |
break; |
3915 |
break; |
| 3916 |
default: |
3916 |
default: |
| 3917 |
break; |
3917 |
break; |
| 3918 |
} |
3918 |
} |
| 3919 |
|
3919 |
|
| 3920 |
return false; |
3920 |
return false; |
| 3921 |
} |
3921 |
} |
| 3922 |
|
3922 |
|
| 3923 |
bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, |
3923 |
bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, |
| 3924 |
SDValue &Shift) { |
3924 |
SDValue &Shift) { |
| 3925 |
if (!isa(N)) |
3925 |
if (!isa(N)) |
| 3926 |
return false; |
3926 |
return false; |
| 3927 |
|
3927 |
|
| 3928 |
SDLoc DL(N); |
3928 |
SDLoc DL(N); |
| 3929 |
int64_t Val = cast(N) |
3929 |
int64_t Val = cast(N) |
| 3930 |
->getAPIntValue() |
3930 |
->getAPIntValue() |
| 3931 |
.trunc(VT.getFixedSizeInBits()) |
3931 |
.trunc(VT.getFixedSizeInBits()) |
| 3932 |
.getSExtValue(); |
3932 |
.getSExtValue(); |
| 3933 |
|
3933 |
|
| 3934 |
switch (VT.SimpleTy) { |
3934 |
switch (VT.SimpleTy) { |
| 3935 |
case MVT::i8: |
3935 |
case MVT::i8: |
| 3936 |
// All immediates are supported. |
3936 |
// All immediates are supported. |
| 3937 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
3937 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
| 3938 |
Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); |
3938 |
Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); |
| 3939 |
return true; |
3939 |
return true; |
| 3940 |
case MVT::i16: |
3940 |
case MVT::i16: |
| 3941 |
case MVT::i32: |
3941 |
case MVT::i32: |
| 3942 |
case MVT::i64: |
3942 |
case MVT::i64: |
| 3943 |
// Support 8bit signed immediates. |
3943 |
// Support 8bit signed immediates. |
| 3944 |
if (Val >= -128 && Val <= 127) { |
3944 |
if (Val >= -128 && Val <= 127) { |
| 3945 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
3945 |
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); |
| 3946 |
Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); |
3946 |
Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); |
| 3947 |
return true; |
3947 |
return true; |
| 3948 |
} |
3948 |
} |
| 3949 |
// Support 16bit signed immediates that are a multiple of 256. |
3949 |
// Support 16bit signed immediates that are a multiple of 256. |
| 3950 |
if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { |
3950 |
if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { |
| 3951 |
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); |
3951 |
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); |
| 3952 |
Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); |
3952 |
Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); |
| 3953 |
return true; |
3953 |
return true; |
| 3954 |
} |
3954 |
} |
| 3955 |
break; |
3955 |
break; |
| 3956 |
default: |
3956 |
default: |
| 3957 |
break; |
3957 |
break; |
| 3958 |
} |
3958 |
} |
| 3959 |
|
3959 |
|
| 3960 |
return false; |
3960 |
return false; |
| 3961 |
} |
3961 |
} |
| 3962 |
|
3962 |
|
| 3963 |
bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { |
3963 |
bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { |
| 3964 |
if (auto CNode = dyn_cast(N)) { |
3964 |
if (auto CNode = dyn_cast(N)) { |
| 3965 |
int64_t ImmVal = CNode->getSExtValue(); |
3965 |
int64_t ImmVal = CNode->getSExtValue(); |
| 3966 |
SDLoc DL(N); |
3966 |
SDLoc DL(N); |
| 3967 |
if (ImmVal >= -128 && ImmVal < 128) { |
3967 |
if (ImmVal >= -128 && ImmVal < 128) { |
| 3968 |
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); |
3968 |
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); |
| 3969 |
return true; |
3969 |
return true; |
| 3970 |
} |
3970 |
} |
| 3971 |
} |
3971 |
} |
| 3972 |
return false; |
3972 |
return false; |
| 3973 |
} |
3973 |
} |
| 3974 |
|
3974 |
|
| 3975 |
bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { |
3975 |
bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { |
| 3976 |
if (auto CNode = dyn_cast(N)) { |
3976 |
if (auto CNode = dyn_cast(N)) { |
| 3977 |
uint64_t ImmVal = CNode->getZExtValue(); |
3977 |
uint64_t ImmVal = CNode->getZExtValue(); |
| 3978 |
|
3978 |
|
| 3979 |
switch (VT.SimpleTy) { |
3979 |
switch (VT.SimpleTy) { |
| 3980 |
case MVT::i8: |
3980 |
case MVT::i8: |
| 3981 |
ImmVal &= 0xFF; |
3981 |
ImmVal &= 0xFF; |
| 3982 |
break; |
3982 |
break; |
| 3983 |
case MVT::i16: |
3983 |
case MVT::i16: |
| 3984 |
ImmVal &= 0xFFFF; |
3984 |
ImmVal &= 0xFFFF; |
| 3985 |
break; |
3985 |
break; |
| 3986 |
case MVT::i32: |
3986 |
case MVT::i32: |
| 3987 |
ImmVal &= 0xFFFFFFFF; |
3987 |
ImmVal &= 0xFFFFFFFF; |
| 3988 |
break; |
3988 |
break; |
| 3989 |
case MVT::i64: |
3989 |
case MVT::i64: |
| 3990 |
break; |
3990 |
break; |
| 3991 |
default: |
3991 |
default: |
| 3992 |
llvm_unreachable("Unexpected type"); |
3992 |
llvm_unreachable("Unexpected type"); |
| 3993 |
} |
3993 |
} |
| 3994 |
|
3994 |
|
| 3995 |
if (ImmVal < 256) { |
3995 |
if (ImmVal < 256) { |
| 3996 |
Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); |
3996 |
Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); |
| 3997 |
return true; |
3997 |
return true; |
| 3998 |
} |
3998 |
} |
| 3999 |
} |
3999 |
} |
| 4000 |
return false; |
4000 |
return false; |
| 4001 |
} |
4001 |
} |
| 4002 |
|
4002 |
|
| 4003 |
bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, |
4003 |
bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, |
| 4004 |
bool Invert) { |
4004 |
bool Invert) { |
| 4005 |
if (auto CNode = dyn_cast(N)) { |
4005 |
if (auto CNode = dyn_cast(N)) { |
| 4006 |
uint64_t ImmVal = CNode->getZExtValue(); |
4006 |
uint64_t ImmVal = CNode->getZExtValue(); |
| 4007 |
SDLoc DL(N); |
4007 |
SDLoc DL(N); |
| 4008 |
|
4008 |
|
| 4009 |
if (Invert) |
4009 |
if (Invert) |
| 4010 |
ImmVal = ~ImmVal; |
4010 |
ImmVal = ~ImmVal; |
| 4011 |
|
4011 |
|
| 4012 |
// Shift mask depending on type size. |
4012 |
// Shift mask depending on type size. |
| 4013 |
switch (VT.SimpleTy) { |
4013 |
switch (VT.SimpleTy) { |
| 4014 |
case MVT::i8: |
4014 |
case MVT::i8: |
| 4015 |
ImmVal &= 0xFF; |
4015 |
ImmVal &= 0xFF; |
| 4016 |
ImmVal |= ImmVal << 8; |
4016 |
ImmVal |= ImmVal << 8; |
| 4017 |
ImmVal |= ImmVal << 16; |
4017 |
ImmVal |= ImmVal << 16; |
| 4018 |
ImmVal |= ImmVal << 32; |
4018 |
ImmVal |= ImmVal << 32; |
| 4019 |
break; |
4019 |
break; |
| 4020 |
case MVT::i16: |
4020 |
case MVT::i16: |
| 4021 |
ImmVal &= 0xFFFF; |
4021 |
ImmVal &= 0xFFFF; |
| 4022 |
ImmVal |= ImmVal << 16; |
4022 |
ImmVal |= ImmVal << 16; |
| 4023 |
ImmVal |= ImmVal << 32; |
4023 |
ImmVal |= ImmVal << 32; |
| 4024 |
break; |
4024 |
break; |
| 4025 |
case MVT::i32: |
4025 |
case MVT::i32: |
| 4026 |
ImmVal &= 0xFFFFFFFF; |
4026 |
ImmVal &= 0xFFFFFFFF; |
| 4027 |
ImmVal |= ImmVal << 32; |
4027 |
ImmVal |= ImmVal << 32; |
| 4028 |
break; |
4028 |
break; |
| 4029 |
case MVT::i64: |
4029 |
case MVT::i64: |
| 4030 |
break; |
4030 |
break; |
| 4031 |
default: |
4031 |
default: |
| 4032 |
llvm_unreachable("Unexpected type"); |
4032 |
llvm_unreachable("Unexpected type"); |
| 4033 |
} |
4033 |
} |
| 4034 |
|
4034 |
|
| 4035 |
uint64_t encoding; |
4035 |
uint64_t encoding; |
| 4036 |
if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { |
4036 |
if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { |
| 4037 |
Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); |
4037 |
Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); |
| 4038 |
return true; |
4038 |
return true; |
| 4039 |
} |
4039 |
} |
| 4040 |
} |
4040 |
} |
| 4041 |
return false; |
4041 |
return false; |
| 4042 |
} |
4042 |
} |
| 4043 |
|
4043 |
|
| 4044 |
// SVE shift intrinsics allow shift amounts larger than the element's bitwidth. |
4044 |
// SVE shift intrinsics allow shift amounts larger than the element's bitwidth. |
| 4045 |
// Rather than attempt to normalise everything we can sometimes saturate the |
4045 |
// Rather than attempt to normalise everything we can sometimes saturate the |
| 4046 |
// shift amount during selection. This function also allows for consistent |
4046 |
// shift amount during selection. This function also allows for consistent |
| 4047 |
// isel patterns by ensuring the resulting "Imm" node is of the i32 type |
4047 |
// isel patterns by ensuring the resulting "Imm" node is of the i32 type |
| 4048 |
// required by the instructions. |
4048 |
// required by the instructions. |
| 4049 |
bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, |
4049 |
bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, |
| 4050 |
uint64_t High, bool AllowSaturation, |
4050 |
uint64_t High, bool AllowSaturation, |
| 4051 |
SDValue &Imm) { |
4051 |
SDValue &Imm) { |
| 4052 |
if (auto *CN = dyn_cast(N)) { |
4052 |
if (auto *CN = dyn_cast(N)) { |
| 4053 |
uint64_t ImmVal = CN->getZExtValue(); |
4053 |
uint64_t ImmVal = CN->getZExtValue(); |
| 4054 |
|
4054 |
|
| 4055 |
// Reject shift amounts that are too small. |
4055 |
// Reject shift amounts that are too small. |
| 4056 |
if (ImmVal < Low) |
4056 |
if (ImmVal < Low) |
| 4057 |
return false; |
4057 |
return false; |
| 4058 |
|
4058 |
|
| 4059 |
// Reject or saturate shift amounts that are too big. |
4059 |
// Reject or saturate shift amounts that are too big. |
| 4060 |
if (ImmVal > High) { |
4060 |
if (ImmVal > High) { |
| 4061 |
if (!AllowSaturation) |
4061 |
if (!AllowSaturation) |
| 4062 |
return false; |
4062 |
return false; |
| 4063 |
ImmVal = High; |
4063 |
ImmVal = High; |
| 4064 |
} |
4064 |
} |
| 4065 |
|
4065 |
|
| 4066 |
Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); |
4066 |
Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); |
| 4067 |
return true; |
4067 |
return true; |
| 4068 |
} |
4068 |
} |
| 4069 |
|
4069 |
|
| 4070 |
return false; |
4070 |
return false; |
| 4071 |
} |
4071 |
} |
| 4072 |
|
4072 |
|
| 4073 |
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { |
4073 |
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { |
| 4074 |
// tagp(FrameIndex, IRGstack, tag_offset): |
4074 |
// tagp(FrameIndex, IRGstack, tag_offset): |
| 4075 |
// since the offset between FrameIndex and IRGstack is a compile-time |
4075 |
// since the offset between FrameIndex and IRGstack is a compile-time |
| 4076 |
// constant, this can be lowered to a single ADDG instruction. |
4076 |
// constant, this can be lowered to a single ADDG instruction. |
| 4077 |
if (!(isa(N->getOperand(1)))) { |
4077 |
if (!(isa(N->getOperand(1)))) { |
| 4078 |
return false; |
4078 |
return false; |
| 4079 |
} |
4079 |
} |
| 4080 |
|
4080 |
|
| 4081 |
SDValue IRG_SP = N->getOperand(2); |
4081 |
SDValue IRG_SP = N->getOperand(2); |
| 4082 |
if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || |
4082 |
if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || |
| 4083 |
cast(IRG_SP->getOperand(1))->getZExtValue() != |
4083 |
cast(IRG_SP->getOperand(1))->getZExtValue() != |
| 4084 |
Intrinsic::aarch64_irg_sp) { |
4084 |
Intrinsic::aarch64_irg_sp) { |
| 4085 |
return false; |
4085 |
return false; |
| 4086 |
} |
4086 |
} |
| 4087 |
|
4087 |
|
| 4088 |
const TargetLowering *TLI = getTargetLowering(); |
4088 |
const TargetLowering *TLI = getTargetLowering(); |
| 4089 |
SDLoc DL(N); |
4089 |
SDLoc DL(N); |
| 4090 |
int FI = cast(N->getOperand(1))->getIndex(); |
4090 |
int FI = cast(N->getOperand(1))->getIndex(); |
| 4091 |
SDValue FiOp = CurDAG->getTargetFrameIndex( |
4091 |
SDValue FiOp = CurDAG->getTargetFrameIndex( |
| 4092 |
FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
4092 |
FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| 4093 |
int TagOffset = cast(N->getOperand(3))->getZExtValue(); |
4093 |
int TagOffset = cast(N->getOperand(3))->getZExtValue(); |
| 4094 |
|
4094 |
|
| 4095 |
SDNode *Out = CurDAG->getMachineNode( |
4095 |
SDNode *Out = CurDAG->getMachineNode( |
| 4096 |
AArch64::TAGPstack, DL, MVT::i64, |
4096 |
AArch64::TAGPstack, DL, MVT::i64, |
| 4097 |
{FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), |
4097 |
{FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), |
| 4098 |
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); |
4098 |
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); |
| 4099 |
ReplaceNode(N, Out); |
4099 |
ReplaceNode(N, Out); |
| 4100 |
return true; |
4100 |
return true; |
| 4101 |
} |
4101 |
} |
| 4102 |
|
4102 |
|
| 4103 |
void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { |
4103 |
void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { |
| 4104 |
assert(isa(N->getOperand(3)) && |
4104 |
assert(isa(N->getOperand(3)) && |
| 4105 |
"llvm.aarch64.tagp third argument must be an immediate"); |
4105 |
"llvm.aarch64.tagp third argument must be an immediate"); |
| 4106 |
if (trySelectStackSlotTagP(N)) |
4106 |
if (trySelectStackSlotTagP(N)) |
| 4107 |
return; |
4107 |
return; |
| 4108 |
// FIXME: above applies in any case when offset between Op1 and Op2 is a |
4108 |
// FIXME: above applies in any case when offset between Op1 and Op2 is a |
| 4109 |
// compile-time constant, not just for stack allocations. |
4109 |
// compile-time constant, not just for stack allocations. |
| 4110 |
|
4110 |
|
| 4111 |
// General case for unrelated pointers in Op1 and Op2. |
4111 |
// General case for unrelated pointers in Op1 and Op2. |
| 4112 |
SDLoc DL(N); |
4112 |
SDLoc DL(N); |
| 4113 |
int TagOffset = cast(N->getOperand(3))->getZExtValue(); |
4113 |
int TagOffset = cast(N->getOperand(3))->getZExtValue(); |
| 4114 |
SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, |
4114 |
SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, |
| 4115 |
{N->getOperand(1), N->getOperand(2)}); |
4115 |
{N->getOperand(1), N->getOperand(2)}); |
| 4116 |
SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, |
4116 |
SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, |
| 4117 |
{SDValue(N1, 0), N->getOperand(2)}); |
4117 |
{SDValue(N1, 0), N->getOperand(2)}); |
| 4118 |
SDNode *N3 = CurDAG->getMachineNode( |
4118 |
SDNode *N3 = CurDAG->getMachineNode( |
| 4119 |
AArch64::ADDG, DL, MVT::i64, |
4119 |
AArch64::ADDG, DL, MVT::i64, |
| 4120 |
{SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), |
4120 |
{SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), |
| 4121 |
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); |
4121 |
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); |
| 4122 |
ReplaceNode(N, N3); |
4122 |
ReplaceNode(N, N3); |
| 4123 |
} |
4123 |
} |
| 4124 |
|
4124 |
|
| 4125 |
bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { |
4125 |
bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { |
| 4126 |
assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); |
4126 |
assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); |
| 4127 |
|
4127 |
|
| 4128 |
// Bail when not a "cast" like insert_subvector. |
4128 |
// Bail when not a "cast" like insert_subvector. |
| 4129 |
if (cast(N->getOperand(2))->getZExtValue() != 0) |
4129 |
if (cast(N->getOperand(2))->getZExtValue() != 0) |
| 4130 |
return false; |
4130 |
return false; |
| 4131 |
if (!N->getOperand(0).isUndef()) |
4131 |
if (!N->getOperand(0).isUndef()) |
| 4132 |
return false; |
4132 |
return false; |
| 4133 |
|
4133 |
|
| 4134 |
// Bail when normal isel should do the job. |
4134 |
// Bail when normal isel should do the job. |
| 4135 |
EVT VT = N->getValueType(0); |
4135 |
EVT VT = N->getValueType(0); |
| 4136 |
EVT InVT = N->getOperand(1).getValueType(); |
4136 |
EVT InVT = N->getOperand(1).getValueType(); |
| 4137 |
if (VT.isFixedLengthVector() || InVT.isScalableVector()) |
4137 |
if (VT.isFixedLengthVector() || InVT.isScalableVector()) |
| 4138 |
return false; |
4138 |
return false; |
| 4139 |
if (InVT.getSizeInBits() <= 128) |
4139 |
if (InVT.getSizeInBits() <= 128) |
| 4140 |
return false; |
4140 |
return false; |
| 4141 |
|
4141 |
|
| 4142 |
// NOTE: We can only get here when doing fixed length SVE code generation. |
4142 |
// NOTE: We can only get here when doing fixed length SVE code generation. |
| 4143 |
// We do manual selection because the types involved are not linked to real |
4143 |
// We do manual selection because the types involved are not linked to real |
| 4144 |
// registers (despite being legal) and must be coerced into SVE registers. |
4144 |
// registers (despite being legal) and must be coerced into SVE registers. |
| 4145 |
|
4145 |
|
| 4146 |
assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && |
4146 |
assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && |
| 4147 |
"Expected to insert into a packed scalable vector!"); |
4147 |
"Expected to insert into a packed scalable vector!"); |
| 4148 |
|
4148 |
|
| 4149 |
SDLoc DL(N); |
4149 |
SDLoc DL(N); |
| 4150 |
auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); |
4150 |
auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); |
| 4151 |
ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, |
4151 |
ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, |
| 4152 |
N->getOperand(1), RC)); |
4152 |
N->getOperand(1), RC)); |
| 4153 |
return true; |
4153 |
return true; |
| 4154 |
} |
4154 |
} |
| 4155 |
|
4155 |
|
| 4156 |
bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { |
4156 |
bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { |
| 4157 |
assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); |
4157 |
assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); |
| 4158 |
|
4158 |
|
| 4159 |
// Bail when not a "cast" like extract_subvector. |
4159 |
// Bail when not a "cast" like extract_subvector. |
| 4160 |
if (cast(N->getOperand(1))->getZExtValue() != 0) |
4160 |
if (cast(N->getOperand(1))->getZExtValue() != 0) |
| 4161 |
return false; |
4161 |
return false; |
| 4162 |
|
4162 |
|
| 4163 |
// Bail when normal isel can do the job. |
4163 |
// Bail when normal isel can do the job. |
| 4164 |
EVT VT = N->getValueType(0); |
4164 |
EVT VT = N->getValueType(0); |
| 4165 |
EVT InVT = N->getOperand(0).getValueType(); |
4165 |
EVT InVT = N->getOperand(0).getValueType(); |
| 4166 |
if (VT.isScalableVector() || InVT.isFixedLengthVector()) |
4166 |
if (VT.isScalableVector() || InVT.isFixedLengthVector()) |
| 4167 |
return false; |
4167 |
return false; |
| 4168 |
if (VT.getSizeInBits() <= 128) |
4168 |
if (VT.getSizeInBits() <= 128) |
| 4169 |
return false; |
4169 |
return false; |
| 4170 |
|
4170 |
|
| 4171 |
// NOTE: We can only get here when doing fixed length SVE code generation. |
4171 |
// NOTE: We can only get here when doing fixed length SVE code generation. |
| 4172 |
// We do manual selection because the types involved are not linked to real |
4172 |
// We do manual selection because the types involved are not linked to real |
| 4173 |
// registers (despite being legal) and must be coerced into SVE registers. |
4173 |
// registers (despite being legal) and must be coerced into SVE registers. |
| 4174 |
|
4174 |
|
| 4175 |
assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && |
4175 |
assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock && |
| 4176 |
"Expected to extract from a packed scalable vector!"); |
4176 |
"Expected to extract from a packed scalable vector!"); |
| 4177 |
|
4177 |
|
| 4178 |
SDLoc DL(N); |
4178 |
SDLoc DL(N); |
| 4179 |
auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); |
4179 |
auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); |
| 4180 |
ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, |
4180 |
ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, |
| 4181 |
N->getOperand(0), RC)); |
4181 |
N->getOperand(0), RC)); |
| 4182 |
return true; |
4182 |
return true; |
| 4183 |
} |
4183 |
} |
| 4184 |
|
4184 |
|
| 4185 |
void AArch64DAGToDAGISel::Select(SDNode *Node) { |
4185 |
void AArch64DAGToDAGISel::Select(SDNode *Node) { |
| 4186 |
// If we have a custom node, we already have selected! |
4186 |
// If we have a custom node, we already have selected! |
| 4187 |
if (Node->isMachineOpcode()) { |
4187 |
if (Node->isMachineOpcode()) { |
| 4188 |
LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); |
4188 |
LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); |
| 4189 |
Node->setNodeId(-1); |
4189 |
Node->setNodeId(-1); |
| 4190 |
return; |
4190 |
return; |
| 4191 |
} |
4191 |
} |
| 4192 |
|
4192 |
|
| 4193 |
// Few custom selection stuff. |
4193 |
// Few custom selection stuff. |
| 4194 |
EVT VT = Node->getValueType(0); |
4194 |
EVT VT = Node->getValueType(0); |
| 4195 |
|
4195 |
|
| 4196 |
switch (Node->getOpcode()) { |
4196 |
switch (Node->getOpcode()) { |
| 4197 |
default: |
4197 |
default: |
| 4198 |
break; |
4198 |
break; |
| 4199 |
|
4199 |
|
| 4200 |
case ISD::ATOMIC_CMP_SWAP: |
4200 |
case ISD::ATOMIC_CMP_SWAP: |
| 4201 |
if (SelectCMP_SWAP(Node)) |
4201 |
if (SelectCMP_SWAP(Node)) |
| 4202 |
return; |
4202 |
return; |
| 4203 |
break; |
4203 |
break; |
| 4204 |
|
4204 |
|
| 4205 |
case ISD::READ_REGISTER: |
4205 |
case ISD::READ_REGISTER: |
| 4206 |
case AArch64ISD::MRRS: |
4206 |
case AArch64ISD::MRRS: |
| 4207 |
if (tryReadRegister(Node)) |
4207 |
if (tryReadRegister(Node)) |
| 4208 |
return; |
4208 |
return; |
| 4209 |
break; |
4209 |
break; |
| 4210 |
|
4210 |
|
| 4211 |
case ISD::WRITE_REGISTER: |
4211 |
case ISD::WRITE_REGISTER: |
| 4212 |
case AArch64ISD::MSRR: |
4212 |
case AArch64ISD::MSRR: |
| 4213 |
if (tryWriteRegister(Node)) |
4213 |
if (tryWriteRegister(Node)) |
| 4214 |
return; |
4214 |
return; |
| 4215 |
break; |
4215 |
break; |
| 4216 |
|
4216 |
|
| 4217 |
case ISD::LOAD: { |
4217 |
case ISD::LOAD: { |
| 4218 |
// Try to select as an indexed load. Fall through to normal processing |
4218 |
// Try to select as an indexed load. Fall through to normal processing |
| 4219 |
// if we can't. |
4219 |
// if we can't. |
| 4220 |
if (tryIndexedLoad(Node)) |
4220 |
if (tryIndexedLoad(Node)) |
| 4221 |
return; |
4221 |
return; |
| 4222 |
break; |
4222 |
break; |
| 4223 |
} |
4223 |
} |
| 4224 |
|
4224 |
|
| 4225 |
case ISD::SRL: |
4225 |
case ISD::SRL: |
| 4226 |
case ISD::AND: |
4226 |
case ISD::AND: |
| 4227 |
case ISD::SRA: |
4227 |
case ISD::SRA: |
| 4228 |
case ISD::SIGN_EXTEND_INREG: |
4228 |
case ISD::SIGN_EXTEND_INREG: |
| 4229 |
if (tryBitfieldExtractOp(Node)) |
4229 |
if (tryBitfieldExtractOp(Node)) |
| 4230 |
return; |
4230 |
return; |
| 4231 |
if (tryBitfieldInsertInZeroOp(Node)) |
4231 |
if (tryBitfieldInsertInZeroOp(Node)) |
| 4232 |
return; |
4232 |
return; |
| 4233 |
[[fallthrough]]; |
4233 |
[[fallthrough]]; |
| 4234 |
case ISD::ROTR: |
4234 |
case ISD::ROTR: |
| 4235 |
case ISD::SHL: |
4235 |
case ISD::SHL: |
| 4236 |
if (tryShiftAmountMod(Node)) |
4236 |
if (tryShiftAmountMod(Node)) |
| 4237 |
return; |
4237 |
return; |
| 4238 |
break; |
4238 |
break; |
| 4239 |
|
4239 |
|
| 4240 |
case ISD::SIGN_EXTEND: |
4240 |
case ISD::SIGN_EXTEND: |
| 4241 |
if (tryBitfieldExtractOpFromSExt(Node)) |
4241 |
if (tryBitfieldExtractOpFromSExt(Node)) |
| 4242 |
return; |
4242 |
return; |
| 4243 |
break; |
4243 |
break; |
| 4244 |
|
4244 |
|
| 4245 |
case ISD::OR: |
4245 |
case ISD::OR: |
| 4246 |
if (tryBitfieldInsertOp(Node)) |
4246 |
if (tryBitfieldInsertOp(Node)) |
| 4247 |
return; |
4247 |
return; |
| 4248 |
break; |
4248 |
break; |
| 4249 |
|
4249 |
|
| 4250 |
case ISD::EXTRACT_SUBVECTOR: { |
4250 |
case ISD::EXTRACT_SUBVECTOR: { |
| 4251 |
if (trySelectCastScalableToFixedLengthVector(Node)) |
4251 |
if (trySelectCastScalableToFixedLengthVector(Node)) |
| 4252 |
return; |
4252 |
return; |
| 4253 |
break; |
4253 |
break; |
| 4254 |
} |
4254 |
} |
| 4255 |
|
4255 |
|
| 4256 |
case ISD::INSERT_SUBVECTOR: { |
4256 |
case ISD::INSERT_SUBVECTOR: { |
| 4257 |
if (trySelectCastFixedLengthToScalableVector(Node)) |
4257 |
if (trySelectCastFixedLengthToScalableVector(Node)) |
| 4258 |
return; |
4258 |
return; |
| 4259 |
break; |
4259 |
break; |
| 4260 |
} |
4260 |
} |
| 4261 |
|
4261 |
|
| 4262 |
case ISD::Constant: { |
4262 |
case ISD::Constant: { |
| 4263 |
// Materialize zero constants as copies from WZR/XZR. This allows |
4263 |
// Materialize zero constants as copies from WZR/XZR. This allows |
| 4264 |
// the coalescer to propagate these into other instructions. |
4264 |
// the coalescer to propagate these into other instructions. |
| 4265 |
ConstantSDNode *ConstNode = cast(Node); |
4265 |
ConstantSDNode *ConstNode = cast(Node); |
| 4266 |
if (ConstNode->isZero()) { |
4266 |
if (ConstNode->isZero()) { |
| 4267 |
if (VT == MVT::i32) { |
4267 |
if (VT == MVT::i32) { |
| 4268 |
SDValue New = CurDAG->getCopyFromReg( |
4268 |
SDValue New = CurDAG->getCopyFromReg( |
| 4269 |
CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); |
4269 |
CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); |
| 4270 |
ReplaceNode(Node, New.getNode()); |
4270 |
ReplaceNode(Node, New.getNode()); |
| 4271 |
return; |
4271 |
return; |
| 4272 |
} else if (VT == MVT::i64) { |
4272 |
} else if (VT == MVT::i64) { |
| 4273 |
SDValue New = CurDAG->getCopyFromReg( |
4273 |
SDValue New = CurDAG->getCopyFromReg( |
| 4274 |
CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); |
4274 |
CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); |
| 4275 |
ReplaceNode(Node, New.getNode()); |
4275 |
ReplaceNode(Node, New.getNode()); |
| 4276 |
return; |
4276 |
return; |
| 4277 |
} |
4277 |
} |
| 4278 |
} |
4278 |
} |
| 4279 |
break; |
4279 |
break; |
| 4280 |
} |
4280 |
} |
| 4281 |
|
4281 |
|
| 4282 |
case ISD::FrameIndex: { |
4282 |
case ISD::FrameIndex: { |
| 4283 |
// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. |
4283 |
// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. |
| 4284 |
int FI = cast(Node)->getIndex(); |
4284 |
int FI = cast(Node)->getIndex(); |
| 4285 |
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); |
4285 |
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); |
| 4286 |
const TargetLowering *TLI = getTargetLowering(); |
4286 |
const TargetLowering *TLI = getTargetLowering(); |
| 4287 |
SDValue TFI = CurDAG->getTargetFrameIndex( |
4287 |
SDValue TFI = CurDAG->getTargetFrameIndex( |
| 4288 |
FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
4288 |
FI, TLI->getPointerTy(CurDAG->getDataLayout())); |
| 4289 |
SDLoc DL(Node); |
4289 |
SDLoc DL(Node); |
| 4290 |
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), |
4290 |
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), |
| 4291 |
CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; |
4291 |
CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; |
| 4292 |
CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); |
4292 |
CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); |
| 4293 |
return; |
4293 |
return; |
| 4294 |
} |
4294 |
} |
| 4295 |
case ISD::INTRINSIC_W_CHAIN: { |
4295 |
case ISD::INTRINSIC_W_CHAIN: { |
| 4296 |
unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); |
4296 |
unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); |
| 4297 |
switch (IntNo) { |
4297 |
switch (IntNo) { |
| 4298 |
default: |
4298 |
default: |
| 4299 |
break; |
4299 |
break; |
| 4300 |
case Intrinsic::aarch64_ldaxp: |
4300 |
case Intrinsic::aarch64_ldaxp: |
| 4301 |
case Intrinsic::aarch64_ldxp: { |
4301 |
case Intrinsic::aarch64_ldxp: { |
| 4302 |
unsigned Op = |
4302 |
unsigned Op = |
| 4303 |
IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; |
4303 |
IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; |
| 4304 |
SDValue MemAddr = Node->getOperand(2); |
4304 |
SDValue MemAddr = Node->getOperand(2); |
| 4305 |
SDLoc DL(Node); |
4305 |
SDLoc DL(Node); |
| 4306 |
SDValue Chain = Node->getOperand(0); |
4306 |
SDValue Chain = Node->getOperand(0); |
| 4307 |
|
4307 |
|
| 4308 |
SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, |
4308 |
SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, |
| 4309 |
MVT::Other, MemAddr, Chain); |
4309 |
MVT::Other, MemAddr, Chain); |
| 4310 |
|
4310 |
|
| 4311 |
// Transfer memoperands. |
4311 |
// Transfer memoperands. |
| 4312 |
MachineMemOperand *MemOp = |
4312 |
MachineMemOperand *MemOp = |
| 4313 |
cast(Node)->getMemOperand(); |
4313 |
cast(Node)->getMemOperand(); |
| 4314 |
CurDAG->setNodeMemRefs(cast(Ld), {MemOp}); |
4314 |
CurDAG->setNodeMemRefs(cast(Ld), {MemOp}); |
| 4315 |
ReplaceNode(Node, Ld); |
4315 |
ReplaceNode(Node, Ld); |
| 4316 |
return; |
4316 |
return; |
| 4317 |
} |
4317 |
} |
| 4318 |
case Intrinsic::aarch64_stlxp: |
4318 |
case Intrinsic::aarch64_stlxp: |
| 4319 |
case Intrinsic::aarch64_stxp: { |
4319 |
case Intrinsic::aarch64_stxp: { |
| 4320 |
unsigned Op = |
4320 |
unsigned Op = |
| 4321 |
IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; |
4321 |
IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; |
| 4322 |
SDLoc DL(Node); |
4322 |
SDLoc DL(Node); |
| 4323 |
SDValue Chain = Node->getOperand(0); |
4323 |
SDValue Chain = Node->getOperand(0); |
| 4324 |
SDValue ValLo = Node->getOperand(2); |
4324 |
SDValue ValLo = Node->getOperand(2); |
| 4325 |
SDValue ValHi = Node->getOperand(3); |
4325 |
SDValue ValHi = Node->getOperand(3); |
| 4326 |
SDValue MemAddr = Node->getOperand(4); |
4326 |
SDValue MemAddr = Node->getOperand(4); |
| 4327 |
|
4327 |
|
| 4328 |
// Place arguments in the right order. |
4328 |
// Place arguments in the right order. |
| 4329 |
SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; |
4329 |
SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; |
| 4330 |
|
4330 |
|
| 4331 |
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); |
4331 |
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); |
| 4332 |
// Transfer memoperands. |
4332 |
// Transfer memoperands. |
| 4333 |
MachineMemOperand *MemOp = |
4333 |
MachineMemOperand *MemOp = |
| 4334 |
cast(Node)->getMemOperand(); |
4334 |
cast(Node)->getMemOperand(); |
| 4335 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
4335 |
CurDAG->setNodeMemRefs(cast(St), {MemOp}); |
| 4336 |
|
4336 |
|
| 4337 |
ReplaceNode(Node, St); |
4337 |
ReplaceNode(Node, St); |
| 4338 |
return; |
4338 |
return; |
| 4339 |
} |
4339 |
} |
| 4340 |
case Intrinsic::aarch64_neon_ld1x2: |
4340 |
case Intrinsic::aarch64_neon_ld1x2: |
| 4341 |
if (VT == MVT::v8i8) { |
4341 |
if (VT == MVT::v8i8) { |
| 4342 |
SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); |
4342 |
SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); |
| 4343 |
return; |
4343 |
return; |
| 4344 |
} else if (VT == MVT::v16i8) { |
4344 |
} else if (VT == MVT::v16i8) { |
| 4345 |
SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); |
4345 |
SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); |
| 4346 |
return; |
4346 |
return; |
| 4347 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4347 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4348 |
SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); |
4348 |
SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); |
| 4349 |
return; |
4349 |
return; |
| 4350 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4350 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4351 |
SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); |
4351 |
SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); |
| 4352 |
return; |
4352 |
return; |
| 4353 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4353 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4354 |
SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); |
4354 |
SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); |
| 4355 |
return; |
4355 |
return; |
| 4356 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4356 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4357 |
SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); |
4357 |
SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); |
| 4358 |
return; |
4358 |
return; |
| 4359 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4359 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4360 |
SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); |
4360 |
SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); |
| 4361 |
return; |
4361 |
return; |
| 4362 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4362 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4363 |
SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); |
4363 |
SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); |
| 4364 |
return; |
4364 |
return; |
| 4365 |
} |
4365 |
} |
| 4366 |
break; |
4366 |
break; |
| 4367 |
case Intrinsic::aarch64_neon_ld1x3: |
4367 |
case Intrinsic::aarch64_neon_ld1x3: |
| 4368 |
if (VT == MVT::v8i8) { |
4368 |
if (VT == MVT::v8i8) { |
| 4369 |
SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); |
4369 |
SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); |
| 4370 |
return; |
4370 |
return; |
| 4371 |
} else if (VT == MVT::v16i8) { |
4371 |
} else if (VT == MVT::v16i8) { |
| 4372 |
SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); |
4372 |
SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); |
| 4373 |
return; |
4373 |
return; |
| 4374 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4374 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4375 |
SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); |
4375 |
SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); |
| 4376 |
return; |
4376 |
return; |
| 4377 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4377 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4378 |
SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); |
4378 |
SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); |
| 4379 |
return; |
4379 |
return; |
| 4380 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4380 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4381 |
SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); |
4381 |
SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); |
| 4382 |
return; |
4382 |
return; |
| 4383 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4383 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4384 |
SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); |
4384 |
SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); |
| 4385 |
return; |
4385 |
return; |
| 4386 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4386 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4387 |
SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); |
4387 |
SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); |
| 4388 |
return; |
4388 |
return; |
| 4389 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4389 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4390 |
SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); |
4390 |
SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); |
| 4391 |
return; |
4391 |
return; |
| 4392 |
} |
4392 |
} |
| 4393 |
break; |
4393 |
break; |
| 4394 |
case Intrinsic::aarch64_neon_ld1x4: |
4394 |
case Intrinsic::aarch64_neon_ld1x4: |
| 4395 |
if (VT == MVT::v8i8) { |
4395 |
if (VT == MVT::v8i8) { |
| 4396 |
SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); |
4396 |
SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); |
| 4397 |
return; |
4397 |
return; |
| 4398 |
} else if (VT == MVT::v16i8) { |
4398 |
} else if (VT == MVT::v16i8) { |
| 4399 |
SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); |
4399 |
SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); |
| 4400 |
return; |
4400 |
return; |
| 4401 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4401 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4402 |
SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); |
4402 |
SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); |
| 4403 |
return; |
4403 |
return; |
| 4404 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4404 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4405 |
SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); |
4405 |
SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); |
| 4406 |
return; |
4406 |
return; |
| 4407 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4407 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4408 |
SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); |
4408 |
SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); |
| 4409 |
return; |
4409 |
return; |
| 4410 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4410 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4411 |
SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); |
4411 |
SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); |
| 4412 |
return; |
4412 |
return; |
| 4413 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4413 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4414 |
SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); |
4414 |
SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); |
| 4415 |
return; |
4415 |
return; |
| 4416 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4416 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4417 |
SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); |
4417 |
SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); |
| 4418 |
return; |
4418 |
return; |
| 4419 |
} |
4419 |
} |
| 4420 |
break; |
4420 |
break; |
| 4421 |
case Intrinsic::aarch64_neon_ld2: |
4421 |
case Intrinsic::aarch64_neon_ld2: |
| 4422 |
if (VT == MVT::v8i8) { |
4422 |
if (VT == MVT::v8i8) { |
| 4423 |
SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); |
4423 |
SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); |
| 4424 |
return; |
4424 |
return; |
| 4425 |
} else if (VT == MVT::v16i8) { |
4425 |
} else if (VT == MVT::v16i8) { |
| 4426 |
SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); |
4426 |
SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); |
| 4427 |
return; |
4427 |
return; |
| 4428 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4428 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4429 |
SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); |
4429 |
SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); |
| 4430 |
return; |
4430 |
return; |
| 4431 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4431 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4432 |
SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); |
4432 |
SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); |
| 4433 |
return; |
4433 |
return; |
| 4434 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4434 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4435 |
SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); |
4435 |
SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); |
| 4436 |
return; |
4436 |
return; |
| 4437 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4437 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4438 |
SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); |
4438 |
SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); |
| 4439 |
return; |
4439 |
return; |
| 4440 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4440 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4441 |
SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); |
4441 |
SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); |
| 4442 |
return; |
4442 |
return; |
| 4443 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4443 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4444 |
SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); |
4444 |
SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); |
| 4445 |
return; |
4445 |
return; |
| 4446 |
} |
4446 |
} |
| 4447 |
break; |
4447 |
break; |
| 4448 |
case Intrinsic::aarch64_neon_ld3: |
4448 |
case Intrinsic::aarch64_neon_ld3: |
| 4449 |
if (VT == MVT::v8i8) { |
4449 |
if (VT == MVT::v8i8) { |
| 4450 |
SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); |
4450 |
SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); |
| 4451 |
return; |
4451 |
return; |
| 4452 |
} else if (VT == MVT::v16i8) { |
4452 |
} else if (VT == MVT::v16i8) { |
| 4453 |
SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); |
4453 |
SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); |
| 4454 |
return; |
4454 |
return; |
| 4455 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4455 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4456 |
SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); |
4456 |
SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); |
| 4457 |
return; |
4457 |
return; |
| 4458 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4458 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4459 |
SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); |
4459 |
SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); |
| 4460 |
return; |
4460 |
return; |
| 4461 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4461 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4462 |
SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); |
4462 |
SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); |
| 4463 |
return; |
4463 |
return; |
| 4464 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4464 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4465 |
SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); |
4465 |
SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); |
| 4466 |
return; |
4466 |
return; |
| 4467 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4467 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4468 |
SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); |
4468 |
SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); |
| 4469 |
return; |
4469 |
return; |
| 4470 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4470 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4471 |
SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); |
4471 |
SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); |
| 4472 |
return; |
4472 |
return; |
| 4473 |
} |
4473 |
} |
| 4474 |
break; |
4474 |
break; |
| 4475 |
case Intrinsic::aarch64_neon_ld4: |
4475 |
case Intrinsic::aarch64_neon_ld4: |
| 4476 |
if (VT == MVT::v8i8) { |
4476 |
if (VT == MVT::v8i8) { |
| 4477 |
SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); |
4477 |
SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); |
| 4478 |
return; |
4478 |
return; |
| 4479 |
} else if (VT == MVT::v16i8) { |
4479 |
} else if (VT == MVT::v16i8) { |
| 4480 |
SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); |
4480 |
SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); |
| 4481 |
return; |
4481 |
return; |
| 4482 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4482 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4483 |
SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); |
4483 |
SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); |
| 4484 |
return; |
4484 |
return; |
| 4485 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4485 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4486 |
SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); |
4486 |
SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); |
| 4487 |
return; |
4487 |
return; |
| 4488 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4488 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4489 |
SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); |
4489 |
SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); |
| 4490 |
return; |
4490 |
return; |
| 4491 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4491 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4492 |
SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); |
4492 |
SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); |
| 4493 |
return; |
4493 |
return; |
| 4494 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4494 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4495 |
SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); |
4495 |
SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); |
| 4496 |
return; |
4496 |
return; |
| 4497 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4497 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4498 |
SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); |
4498 |
SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); |
| 4499 |
return; |
4499 |
return; |
| 4500 |
} |
4500 |
} |
| 4501 |
break; |
4501 |
break; |
| 4502 |
case Intrinsic::aarch64_neon_ld2r: |
4502 |
case Intrinsic::aarch64_neon_ld2r: |
| 4503 |
if (VT == MVT::v8i8) { |
4503 |
if (VT == MVT::v8i8) { |
| 4504 |
SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); |
4504 |
SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); |
| 4505 |
return; |
4505 |
return; |
| 4506 |
} else if (VT == MVT::v16i8) { |
4506 |
} else if (VT == MVT::v16i8) { |
| 4507 |
SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); |
4507 |
SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); |
| 4508 |
return; |
4508 |
return; |
| 4509 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4509 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4510 |
SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); |
4510 |
SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); |
| 4511 |
return; |
4511 |
return; |
| 4512 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4512 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4513 |
SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); |
4513 |
SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); |
| 4514 |
return; |
4514 |
return; |
| 4515 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4515 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4516 |
SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); |
4516 |
SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); |
| 4517 |
return; |
4517 |
return; |
| 4518 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4518 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4519 |
SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); |
4519 |
SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); |
| 4520 |
return; |
4520 |
return; |
| 4521 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4521 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4522 |
SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); |
4522 |
SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); |
| 4523 |
return; |
4523 |
return; |
| 4524 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4524 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4525 |
SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); |
4525 |
SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); |
| 4526 |
return; |
4526 |
return; |
| 4527 |
} |
4527 |
} |
| 4528 |
break; |
4528 |
break; |
| 4529 |
case Intrinsic::aarch64_neon_ld3r: |
4529 |
case Intrinsic::aarch64_neon_ld3r: |
| 4530 |
if (VT == MVT::v8i8) { |
4530 |
if (VT == MVT::v8i8) { |
| 4531 |
SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); |
4531 |
SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); |
| 4532 |
return; |
4532 |
return; |
| 4533 |
} else if (VT == MVT::v16i8) { |
4533 |
} else if (VT == MVT::v16i8) { |
| 4534 |
SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); |
4534 |
SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); |
| 4535 |
return; |
4535 |
return; |
| 4536 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4536 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4537 |
SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); |
4537 |
SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); |
| 4538 |
return; |
4538 |
return; |
| 4539 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4539 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4540 |
SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); |
4540 |
SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); |
| 4541 |
return; |
4541 |
return; |
| 4542 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4542 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4543 |
SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); |
4543 |
SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); |
| 4544 |
return; |
4544 |
return; |
| 4545 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4545 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4546 |
SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); |
4546 |
SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); |
| 4547 |
return; |
4547 |
return; |
| 4548 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4548 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4549 |
SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); |
4549 |
SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); |
| 4550 |
return; |
4550 |
return; |
| 4551 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4551 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4552 |
SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); |
4552 |
SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); |
| 4553 |
return; |
4553 |
return; |
| 4554 |
} |
4554 |
} |
| 4555 |
break; |
4555 |
break; |
| 4556 |
case Intrinsic::aarch64_neon_ld4r: |
4556 |
case Intrinsic::aarch64_neon_ld4r: |
| 4557 |
if (VT == MVT::v8i8) { |
4557 |
if (VT == MVT::v8i8) { |
| 4558 |
SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); |
4558 |
SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); |
| 4559 |
return; |
4559 |
return; |
| 4560 |
} else if (VT == MVT::v16i8) { |
4560 |
} else if (VT == MVT::v16i8) { |
| 4561 |
SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); |
4561 |
SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); |
| 4562 |
return; |
4562 |
return; |
| 4563 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
4563 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 4564 |
SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); |
4564 |
SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); |
| 4565 |
return; |
4565 |
return; |
| 4566 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
4566 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 4567 |
SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); |
4567 |
SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); |
| 4568 |
return; |
4568 |
return; |
| 4569 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
4569 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 4570 |
SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); |
4570 |
SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); |
| 4571 |
return; |
4571 |
return; |
| 4572 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
4572 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 4573 |
SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); |
4573 |
SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); |
| 4574 |
return; |
4574 |
return; |
| 4575 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
4575 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 4576 |
SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); |
4576 |
SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); |
| 4577 |
return; |
4577 |
return; |
| 4578 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
4578 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 4579 |
SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); |
4579 |
SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); |
| 4580 |
return; |
4580 |
return; |
| 4581 |
} |
4581 |
} |
| 4582 |
break; |
4582 |
break; |
| 4583 |
case Intrinsic::aarch64_neon_ld2lane: |
4583 |
case Intrinsic::aarch64_neon_ld2lane: |
| 4584 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
4584 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 4585 |
SelectLoadLane(Node, 2, AArch64::LD2i8); |
4585 |
SelectLoadLane(Node, 2, AArch64::LD2i8); |
| 4586 |
return; |
4586 |
return; |
| 4587 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
4587 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 4588 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
4588 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 4589 |
SelectLoadLane(Node, 2, AArch64::LD2i16); |
4589 |
SelectLoadLane(Node, 2, AArch64::LD2i16); |
| 4590 |
return; |
4590 |
return; |
| 4591 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
4591 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 4592 |
VT == MVT::v2f32) { |
4592 |
VT == MVT::v2f32) { |
| 4593 |
SelectLoadLane(Node, 2, AArch64::LD2i32); |
4593 |
SelectLoadLane(Node, 2, AArch64::LD2i32); |
| 4594 |
return; |
4594 |
return; |
| 4595 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
4595 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 4596 |
VT == MVT::v1f64) { |
4596 |
VT == MVT::v1f64) { |
| 4597 |
SelectLoadLane(Node, 2, AArch64::LD2i64); |
4597 |
SelectLoadLane(Node, 2, AArch64::LD2i64); |
| 4598 |
return; |
4598 |
return; |
| 4599 |
} |
4599 |
} |
| 4600 |
break; |
4600 |
break; |
| 4601 |
case Intrinsic::aarch64_neon_ld3lane: |
4601 |
case Intrinsic::aarch64_neon_ld3lane: |
| 4602 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
4602 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 4603 |
SelectLoadLane(Node, 3, AArch64::LD3i8); |
4603 |
SelectLoadLane(Node, 3, AArch64::LD3i8); |
| 4604 |
return; |
4604 |
return; |
| 4605 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
4605 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 4606 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
4606 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 4607 |
SelectLoadLane(Node, 3, AArch64::LD3i16); |
4607 |
SelectLoadLane(Node, 3, AArch64::LD3i16); |
| 4608 |
return; |
4608 |
return; |
| 4609 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
4609 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 4610 |
VT == MVT::v2f32) { |
4610 |
VT == MVT::v2f32) { |
| 4611 |
SelectLoadLane(Node, 3, AArch64::LD3i32); |
4611 |
SelectLoadLane(Node, 3, AArch64::LD3i32); |
| 4612 |
return; |
4612 |
return; |
| 4613 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
4613 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 4614 |
VT == MVT::v1f64) { |
4614 |
VT == MVT::v1f64) { |
| 4615 |
SelectLoadLane(Node, 3, AArch64::LD3i64); |
4615 |
SelectLoadLane(Node, 3, AArch64::LD3i64); |
| 4616 |
return; |
4616 |
return; |
| 4617 |
} |
4617 |
} |
| 4618 |
break; |
4618 |
break; |
| 4619 |
case Intrinsic::aarch64_neon_ld4lane: |
4619 |
case Intrinsic::aarch64_neon_ld4lane: |
| 4620 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
4620 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 4621 |
SelectLoadLane(Node, 4, AArch64::LD4i8); |
4621 |
SelectLoadLane(Node, 4, AArch64::LD4i8); |
| 4622 |
return; |
4622 |
return; |
| 4623 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
4623 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 4624 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
4624 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 4625 |
SelectLoadLane(Node, 4, AArch64::LD4i16); |
4625 |
SelectLoadLane(Node, 4, AArch64::LD4i16); |
| 4626 |
return; |
4626 |
return; |
| 4627 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
4627 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 4628 |
VT == MVT::v2f32) { |
4628 |
VT == MVT::v2f32) { |
| 4629 |
SelectLoadLane(Node, 4, AArch64::LD4i32); |
4629 |
SelectLoadLane(Node, 4, AArch64::LD4i32); |
| 4630 |
return; |
4630 |
return; |
| 4631 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
4631 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 4632 |
VT == MVT::v1f64) { |
4632 |
VT == MVT::v1f64) { |
| 4633 |
SelectLoadLane(Node, 4, AArch64::LD4i64); |
4633 |
SelectLoadLane(Node, 4, AArch64::LD4i64); |
| 4634 |
return; |
4634 |
return; |
| 4635 |
} |
4635 |
} |
| 4636 |
break; |
4636 |
break; |
| 4637 |
case Intrinsic::aarch64_ld64b: |
4637 |
case Intrinsic::aarch64_ld64b: |
| 4638 |
SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); |
4638 |
SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); |
| 4639 |
return; |
4639 |
return; |
| 4640 |
case Intrinsic::aarch64_sve_ld2_sret: { |
4640 |
case Intrinsic::aarch64_sve_ld2_sret: { |
| 4641 |
if (VT == MVT::nxv16i8) { |
4641 |
if (VT == MVT::nxv16i8) { |
| 4642 |
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, |
4642 |
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, |
| 4643 |
true); |
4643 |
true); |
| 4644 |
return; |
4644 |
return; |
| 4645 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4645 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4646 |
VT == MVT::nxv8bf16) { |
4646 |
VT == MVT::nxv8bf16) { |
| 4647 |
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, |
4647 |
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, |
| 4648 |
true); |
4648 |
true); |
| 4649 |
return; |
4649 |
return; |
| 4650 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4650 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4651 |
SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, |
4651 |
SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, |
| 4652 |
true); |
4652 |
true); |
| 4653 |
return; |
4653 |
return; |
| 4654 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4654 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4655 |
SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, |
4655 |
SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, |
| 4656 |
true); |
4656 |
true); |
| 4657 |
return; |
4657 |
return; |
| 4658 |
} |
4658 |
} |
| 4659 |
break; |
4659 |
break; |
| 4660 |
} |
4660 |
} |
| 4661 |
case Intrinsic::aarch64_sve_ld1_pn_x2: { |
4661 |
case Intrinsic::aarch64_sve_ld1_pn_x2: { |
| 4662 |
if (VT == MVT::nxv16i8) { |
4662 |
if (VT == MVT::nxv16i8) { |
| 4663 |
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z); |
4663 |
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z); |
| 4664 |
return; |
4664 |
return; |
| 4665 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4665 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4666 |
VT == MVT::nxv8bf16) { |
4666 |
VT == MVT::nxv8bf16) { |
| 4667 |
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z); |
4667 |
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z); |
| 4668 |
return; |
4668 |
return; |
| 4669 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4669 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4670 |
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z); |
4670 |
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z); |
| 4671 |
return; |
4671 |
return; |
| 4672 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4672 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4673 |
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z); |
4673 |
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z); |
| 4674 |
return; |
4674 |
return; |
| 4675 |
} |
4675 |
} |
| 4676 |
break; |
4676 |
break; |
| 4677 |
} |
4677 |
} |
| 4678 |
case Intrinsic::aarch64_sve_ld1_pn_x4: { |
4678 |
case Intrinsic::aarch64_sve_ld1_pn_x4: { |
| 4679 |
if (VT == MVT::nxv16i8) { |
4679 |
if (VT == MVT::nxv16i8) { |
| 4680 |
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z); |
4680 |
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z); |
| 4681 |
return; |
4681 |
return; |
| 4682 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4682 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4683 |
VT == MVT::nxv8bf16) { |
4683 |
VT == MVT::nxv8bf16) { |
| 4684 |
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z); |
4684 |
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z); |
| 4685 |
return; |
4685 |
return; |
| 4686 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4686 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4687 |
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z); |
4687 |
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z); |
| 4688 |
return; |
4688 |
return; |
| 4689 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4689 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4690 |
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z); |
4690 |
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z); |
| 4691 |
return; |
4691 |
return; |
| 4692 |
} |
4692 |
} |
| 4693 |
break; |
4693 |
break; |
| 4694 |
} |
4694 |
} |
| 4695 |
case Intrinsic::aarch64_sve_ldnt1_pn_x2: { |
4695 |
case Intrinsic::aarch64_sve_ldnt1_pn_x2: { |
| 4696 |
if (VT == MVT::nxv16i8) { |
4696 |
if (VT == MVT::nxv16i8) { |
| 4697 |
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z); |
4697 |
SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z); |
| 4698 |
return; |
4698 |
return; |
| 4699 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4699 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4700 |
VT == MVT::nxv8bf16) { |
4700 |
VT == MVT::nxv8bf16) { |
| 4701 |
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z); |
4701 |
SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z); |
| 4702 |
return; |
4702 |
return; |
| 4703 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4703 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4704 |
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z); |
4704 |
SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z); |
| 4705 |
return; |
4705 |
return; |
| 4706 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4706 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4707 |
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z); |
4707 |
SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z); |
| 4708 |
return; |
4708 |
return; |
| 4709 |
} |
4709 |
} |
| 4710 |
break; |
4710 |
break; |
| 4711 |
} |
4711 |
} |
| 4712 |
case Intrinsic::aarch64_sve_ldnt1_pn_x4: { |
4712 |
case Intrinsic::aarch64_sve_ldnt1_pn_x4: { |
| 4713 |
if (VT == MVT::nxv16i8) { |
4713 |
if (VT == MVT::nxv16i8) { |
| 4714 |
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z); |
4714 |
SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z); |
| 4715 |
return; |
4715 |
return; |
| 4716 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4716 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4717 |
VT == MVT::nxv8bf16) { |
4717 |
VT == MVT::nxv8bf16) { |
| 4718 |
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z); |
4718 |
SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z); |
| 4719 |
return; |
4719 |
return; |
| 4720 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4720 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4721 |
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z); |
4721 |
SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z); |
| 4722 |
return; |
4722 |
return; |
| 4723 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4723 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4724 |
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z); |
4724 |
SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z); |
| 4725 |
return; |
4725 |
return; |
| 4726 |
} |
4726 |
} |
| 4727 |
break; |
4727 |
break; |
| 4728 |
} |
4728 |
} |
| 4729 |
case Intrinsic::aarch64_sve_ld3_sret: { |
4729 |
case Intrinsic::aarch64_sve_ld3_sret: { |
| 4730 |
if (VT == MVT::nxv16i8) { |
4730 |
if (VT == MVT::nxv16i8) { |
| 4731 |
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, |
4731 |
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, |
| 4732 |
true); |
4732 |
true); |
| 4733 |
return; |
4733 |
return; |
| 4734 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4734 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4735 |
VT == MVT::nxv8bf16) { |
4735 |
VT == MVT::nxv8bf16) { |
| 4736 |
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, |
4736 |
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, |
| 4737 |
true); |
4737 |
true); |
| 4738 |
return; |
4738 |
return; |
| 4739 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4739 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4740 |
SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, |
4740 |
SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, |
| 4741 |
true); |
4741 |
true); |
| 4742 |
return; |
4742 |
return; |
| 4743 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4743 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4744 |
SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, |
4744 |
SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, |
| 4745 |
true); |
4745 |
true); |
| 4746 |
return; |
4746 |
return; |
| 4747 |
} |
4747 |
} |
| 4748 |
break; |
4748 |
break; |
| 4749 |
} |
4749 |
} |
| 4750 |
case Intrinsic::aarch64_sve_ld4_sret: { |
4750 |
case Intrinsic::aarch64_sve_ld4_sret: { |
| 4751 |
if (VT == MVT::nxv16i8) { |
4751 |
if (VT == MVT::nxv16i8) { |
| 4752 |
SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, |
4752 |
SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, |
| 4753 |
true); |
4753 |
true); |
| 4754 |
return; |
4754 |
return; |
| 4755 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4755 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4756 |
VT == MVT::nxv8bf16) { |
4756 |
VT == MVT::nxv8bf16) { |
| 4757 |
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, |
4757 |
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, |
| 4758 |
true); |
4758 |
true); |
| 4759 |
return; |
4759 |
return; |
| 4760 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4760 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4761 |
SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, |
4761 |
SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, |
| 4762 |
true); |
4762 |
true); |
| 4763 |
return; |
4763 |
return; |
| 4764 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4764 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4765 |
SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, |
4765 |
SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, |
| 4766 |
true); |
4766 |
true); |
| 4767 |
return; |
4767 |
return; |
| 4768 |
} |
4768 |
} |
| 4769 |
break; |
4769 |
break; |
| 4770 |
} |
4770 |
} |
| 4771 |
case Intrinsic::aarch64_sme_read_hor_vg2: { |
4771 |
case Intrinsic::aarch64_sme_read_hor_vg2: { |
| 4772 |
if (VT == MVT::nxv16i8) { |
4772 |
if (VT == MVT::nxv16i8) { |
| 4773 |
SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, |
4773 |
SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, |
| 4774 |
AArch64::MOVA_2ZMXI_H_B); |
4774 |
AArch64::MOVA_2ZMXI_H_B); |
| 4775 |
return; |
4775 |
return; |
| 4776 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4776 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4777 |
VT == MVT::nxv8bf16) { |
4777 |
VT == MVT::nxv8bf16) { |
| 4778 |
SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, |
4778 |
SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, |
| 4779 |
AArch64::MOVA_2ZMXI_H_H); |
4779 |
AArch64::MOVA_2ZMXI_H_H); |
| 4780 |
return; |
4780 |
return; |
| 4781 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4781 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4782 |
SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, |
4782 |
SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, |
| 4783 |
AArch64::MOVA_2ZMXI_H_S); |
4783 |
AArch64::MOVA_2ZMXI_H_S); |
| 4784 |
return; |
4784 |
return; |
| 4785 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4785 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4786 |
SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, |
4786 |
SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, |
| 4787 |
AArch64::MOVA_2ZMXI_H_D); |
4787 |
AArch64::MOVA_2ZMXI_H_D); |
| 4788 |
return; |
4788 |
return; |
| 4789 |
} |
4789 |
} |
| 4790 |
break; |
4790 |
break; |
| 4791 |
} |
4791 |
} |
| 4792 |
case Intrinsic::aarch64_sme_read_ver_vg2: { |
4792 |
case Intrinsic::aarch64_sme_read_ver_vg2: { |
| 4793 |
if (VT == MVT::nxv16i8) { |
4793 |
if (VT == MVT::nxv16i8) { |
| 4794 |
SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, |
4794 |
SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0, |
| 4795 |
AArch64::MOVA_2ZMXI_V_B); |
4795 |
AArch64::MOVA_2ZMXI_V_B); |
| 4796 |
return; |
4796 |
return; |
| 4797 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4797 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4798 |
VT == MVT::nxv8bf16) { |
4798 |
VT == MVT::nxv8bf16) { |
| 4799 |
SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, |
4799 |
SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0, |
| 4800 |
AArch64::MOVA_2ZMXI_V_H); |
4800 |
AArch64::MOVA_2ZMXI_V_H); |
| 4801 |
return; |
4801 |
return; |
| 4802 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4802 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4803 |
SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, |
4803 |
SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0, |
| 4804 |
AArch64::MOVA_2ZMXI_V_S); |
4804 |
AArch64::MOVA_2ZMXI_V_S); |
| 4805 |
return; |
4805 |
return; |
| 4806 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4806 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4807 |
SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, |
4807 |
SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0, |
| 4808 |
AArch64::MOVA_2ZMXI_V_D); |
4808 |
AArch64::MOVA_2ZMXI_V_D); |
| 4809 |
return; |
4809 |
return; |
| 4810 |
} |
4810 |
} |
| 4811 |
break; |
4811 |
break; |
| 4812 |
} |
4812 |
} |
| 4813 |
case Intrinsic::aarch64_sme_read_hor_vg4: { |
4813 |
case Intrinsic::aarch64_sme_read_hor_vg4: { |
| 4814 |
if (VT == MVT::nxv16i8) { |
4814 |
if (VT == MVT::nxv16i8) { |
| 4815 |
SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, |
4815 |
SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, |
| 4816 |
AArch64::MOVA_4ZMXI_H_B); |
4816 |
AArch64::MOVA_4ZMXI_H_B); |
| 4817 |
return; |
4817 |
return; |
| 4818 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4818 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4819 |
VT == MVT::nxv8bf16) { |
4819 |
VT == MVT::nxv8bf16) { |
| 4820 |
SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, |
4820 |
SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, |
| 4821 |
AArch64::MOVA_4ZMXI_H_H); |
4821 |
AArch64::MOVA_4ZMXI_H_H); |
| 4822 |
return; |
4822 |
return; |
| 4823 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4823 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4824 |
SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0, |
4824 |
SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0, |
| 4825 |
AArch64::MOVA_4ZMXI_H_S); |
4825 |
AArch64::MOVA_4ZMXI_H_S); |
| 4826 |
return; |
4826 |
return; |
| 4827 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4827 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4828 |
SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0, |
4828 |
SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0, |
| 4829 |
AArch64::MOVA_4ZMXI_H_D); |
4829 |
AArch64::MOVA_4ZMXI_H_D); |
| 4830 |
return; |
4830 |
return; |
| 4831 |
} |
4831 |
} |
| 4832 |
break; |
4832 |
break; |
| 4833 |
} |
4833 |
} |
| 4834 |
case Intrinsic::aarch64_sme_read_ver_vg4: { |
4834 |
case Intrinsic::aarch64_sme_read_ver_vg4: { |
| 4835 |
if (VT == MVT::nxv16i8) { |
4835 |
if (VT == MVT::nxv16i8) { |
| 4836 |
SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, |
4836 |
SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0, |
| 4837 |
AArch64::MOVA_4ZMXI_V_B); |
4837 |
AArch64::MOVA_4ZMXI_V_B); |
| 4838 |
return; |
4838 |
return; |
| 4839 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
4839 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 4840 |
VT == MVT::nxv8bf16) { |
4840 |
VT == MVT::nxv8bf16) { |
| 4841 |
SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, |
4841 |
SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0, |
| 4842 |
AArch64::MOVA_4ZMXI_V_H); |
4842 |
AArch64::MOVA_4ZMXI_V_H); |
| 4843 |
return; |
4843 |
return; |
| 4844 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
4844 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 4845 |
SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0, |
4845 |
SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0, |
| 4846 |
AArch64::MOVA_4ZMXI_V_S); |
4846 |
AArch64::MOVA_4ZMXI_V_S); |
| 4847 |
return; |
4847 |
return; |
| 4848 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
4848 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 4849 |
SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0, |
4849 |
SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0, |
| 4850 |
AArch64::MOVA_4ZMXI_V_D); |
4850 |
AArch64::MOVA_4ZMXI_V_D); |
| 4851 |
return; |
4851 |
return; |
| 4852 |
} |
4852 |
} |
| 4853 |
break; |
4853 |
break; |
| 4854 |
} |
4854 |
} |
| 4855 |
case Intrinsic::aarch64_sme_read_vg1x2: { |
4855 |
case Intrinsic::aarch64_sme_read_vg1x2: { |
| 4856 |
SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA, |
4856 |
SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA, |
| 4857 |
AArch64::MOVA_VG2_2ZMXI); |
4857 |
AArch64::MOVA_VG2_2ZMXI); |
| 4858 |
return; |
4858 |
return; |
| 4859 |
} |
4859 |
} |
| 4860 |
case Intrinsic::aarch64_sme_read_vg1x4: { |
4860 |
case Intrinsic::aarch64_sme_read_vg1x4: { |
| 4861 |
SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA, |
4861 |
SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA, |
| 4862 |
AArch64::MOVA_VG4_4ZMXI); |
4862 |
AArch64::MOVA_VG4_4ZMXI); |
| 4863 |
return; |
4863 |
return; |
| 4864 |
} |
4864 |
} |
| 4865 |
case Intrinsic::swift_async_context_addr: { |
4865 |
case Intrinsic::swift_async_context_addr: { |
| 4866 |
SDLoc DL(Node); |
4866 |
SDLoc DL(Node); |
| 4867 |
SDValue Chain = Node->getOperand(0); |
4867 |
SDValue Chain = Node->getOperand(0); |
| 4868 |
SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); |
4868 |
SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); |
| 4869 |
SDValue Res = SDValue( |
4869 |
SDValue Res = SDValue( |
| 4870 |
CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, |
4870 |
CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, |
| 4871 |
CurDAG->getTargetConstant(8, DL, MVT::i32), |
4871 |
CurDAG->getTargetConstant(8, DL, MVT::i32), |
| 4872 |
CurDAG->getTargetConstant(0, DL, MVT::i32)), |
4872 |
CurDAG->getTargetConstant(0, DL, MVT::i32)), |
| 4873 |
0); |
4873 |
0); |
| 4874 |
ReplaceUses(SDValue(Node, 0), Res); |
4874 |
ReplaceUses(SDValue(Node, 0), Res); |
| 4875 |
ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); |
4875 |
ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); |
| 4876 |
CurDAG->RemoveDeadNode(Node); |
4876 |
CurDAG->RemoveDeadNode(Node); |
| 4877 |
|
4877 |
|
| 4878 |
auto &MF = CurDAG->getMachineFunction(); |
4878 |
auto &MF = CurDAG->getMachineFunction(); |
| 4879 |
MF.getFrameInfo().setFrameAddressIsTaken(true); |
4879 |
MF.getFrameInfo().setFrameAddressIsTaken(true); |
| 4880 |
MF.getInfo()->setHasSwiftAsyncContext(true); |
4880 |
MF.getInfo()->setHasSwiftAsyncContext(true); |
| 4881 |
return; |
4881 |
return; |
| 4882 |
} |
4882 |
} |
| 4883 |
} |
4883 |
} |
| 4884 |
} break; |
4884 |
} break; |
| 4885 |
case ISD::INTRINSIC_WO_CHAIN: { |
4885 |
case ISD::INTRINSIC_WO_CHAIN: { |
| 4886 |
unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); |
4886 |
unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); |
| 4887 |
switch (IntNo) { |
4887 |
switch (IntNo) { |
| 4888 |
default: |
4888 |
default: |
| 4889 |
break; |
4889 |
break; |
| 4890 |
case Intrinsic::aarch64_tagp: |
4890 |
case Intrinsic::aarch64_tagp: |
| 4891 |
SelectTagP(Node); |
4891 |
SelectTagP(Node); |
| 4892 |
return; |
4892 |
return; |
| 4893 |
case Intrinsic::aarch64_neon_tbl2: |
4893 |
case Intrinsic::aarch64_neon_tbl2: |
| 4894 |
SelectTable(Node, 2, |
4894 |
SelectTable(Node, 2, |
| 4895 |
VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, |
4895 |
VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, |
| 4896 |
false); |
4896 |
false); |
| 4897 |
return; |
4897 |
return; |
| 4898 |
case Intrinsic::aarch64_neon_tbl3: |
4898 |
case Intrinsic::aarch64_neon_tbl3: |
| 4899 |
SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three |
4899 |
SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three |
| 4900 |
: AArch64::TBLv16i8Three, |
4900 |
: AArch64::TBLv16i8Three, |
| 4901 |
false); |
4901 |
false); |
| 4902 |
return; |
4902 |
return; |
| 4903 |
case Intrinsic::aarch64_neon_tbl4: |
4903 |
case Intrinsic::aarch64_neon_tbl4: |
| 4904 |
SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four |
4904 |
SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four |
| 4905 |
: AArch64::TBLv16i8Four, |
4905 |
: AArch64::TBLv16i8Four, |
| 4906 |
false); |
4906 |
false); |
| 4907 |
return; |
4907 |
return; |
| 4908 |
case Intrinsic::aarch64_neon_tbx2: |
4908 |
case Intrinsic::aarch64_neon_tbx2: |
| 4909 |
SelectTable(Node, 2, |
4909 |
SelectTable(Node, 2, |
| 4910 |
VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, |
4910 |
VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, |
| 4911 |
true); |
4911 |
true); |
| 4912 |
return; |
4912 |
return; |
| 4913 |
case Intrinsic::aarch64_neon_tbx3: |
4913 |
case Intrinsic::aarch64_neon_tbx3: |
| 4914 |
SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three |
4914 |
SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three |
| 4915 |
: AArch64::TBXv16i8Three, |
4915 |
: AArch64::TBXv16i8Three, |
| 4916 |
true); |
4916 |
true); |
| 4917 |
return; |
4917 |
return; |
| 4918 |
case Intrinsic::aarch64_neon_tbx4: |
4918 |
case Intrinsic::aarch64_neon_tbx4: |
| 4919 |
SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four |
4919 |
SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four |
| 4920 |
: AArch64::TBXv16i8Four, |
4920 |
: AArch64::TBXv16i8Four, |
| 4921 |
true); |
4921 |
true); |
| 4922 |
return; |
4922 |
return; |
| 4923 |
case Intrinsic::aarch64_sve_srshl_single_x2: |
4923 |
case Intrinsic::aarch64_sve_srshl_single_x2: |
| 4924 |
if (auto Op = SelectOpcodeFromVT( |
4924 |
if (auto Op = SelectOpcodeFromVT( |
| 4925 |
Node->getValueType(0), |
4925 |
Node->getValueType(0), |
| 4926 |
{AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H, |
4926 |
{AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H, |
| 4927 |
AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D})) |
4927 |
AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D})) |
| 4928 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
4928 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 4929 |
return; |
4929 |
return; |
| 4930 |
case Intrinsic::aarch64_sve_srshl_single_x4: |
4930 |
case Intrinsic::aarch64_sve_srshl_single_x4: |
| 4931 |
if (auto Op = SelectOpcodeFromVT( |
4931 |
if (auto Op = SelectOpcodeFromVT( |
| 4932 |
Node->getValueType(0), |
4932 |
Node->getValueType(0), |
| 4933 |
{AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H, |
4933 |
{AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H, |
| 4934 |
AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D})) |
4934 |
AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D})) |
| 4935 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
4935 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 4936 |
return; |
4936 |
return; |
| 4937 |
case Intrinsic::aarch64_sve_urshl_single_x2: |
4937 |
case Intrinsic::aarch64_sve_urshl_single_x2: |
| 4938 |
if (auto Op = SelectOpcodeFromVT( |
4938 |
if (auto Op = SelectOpcodeFromVT( |
| 4939 |
Node->getValueType(0), |
4939 |
Node->getValueType(0), |
| 4940 |
{AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H, |
4940 |
{AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H, |
| 4941 |
AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D})) |
4941 |
AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D})) |
| 4942 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
4942 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 4943 |
return; |
4943 |
return; |
| 4944 |
case Intrinsic::aarch64_sve_urshl_single_x4: |
4944 |
case Intrinsic::aarch64_sve_urshl_single_x4: |
| 4945 |
if (auto Op = SelectOpcodeFromVT( |
4945 |
if (auto Op = SelectOpcodeFromVT( |
| 4946 |
Node->getValueType(0), |
4946 |
Node->getValueType(0), |
| 4947 |
{AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H, |
4947 |
{AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H, |
| 4948 |
AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D})) |
4948 |
AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D})) |
| 4949 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
4949 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 4950 |
return; |
4950 |
return; |
| 4951 |
case Intrinsic::aarch64_sve_srshl_x2: |
4951 |
case Intrinsic::aarch64_sve_srshl_x2: |
| 4952 |
if (auto Op = SelectOpcodeFromVT( |
4952 |
if (auto Op = SelectOpcodeFromVT( |
| 4953 |
Node->getValueType(0), |
4953 |
Node->getValueType(0), |
| 4954 |
{AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H, |
4954 |
{AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H, |
| 4955 |
AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D})) |
4955 |
AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D})) |
| 4956 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
4956 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 4957 |
return; |
4957 |
return; |
| 4958 |
case Intrinsic::aarch64_sve_srshl_x4: |
4958 |
case Intrinsic::aarch64_sve_srshl_x4: |
| 4959 |
if (auto Op = SelectOpcodeFromVT( |
4959 |
if (auto Op = SelectOpcodeFromVT( |
| 4960 |
Node->getValueType(0), |
4960 |
Node->getValueType(0), |
| 4961 |
{AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H, |
4961 |
{AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H, |
| 4962 |
AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D})) |
4962 |
AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D})) |
| 4963 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
4963 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 4964 |
return; |
4964 |
return; |
| 4965 |
case Intrinsic::aarch64_sve_urshl_x2: |
4965 |
case Intrinsic::aarch64_sve_urshl_x2: |
| 4966 |
if (auto Op = SelectOpcodeFromVT( |
4966 |
if (auto Op = SelectOpcodeFromVT( |
| 4967 |
Node->getValueType(0), |
4967 |
Node->getValueType(0), |
| 4968 |
{AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H, |
4968 |
{AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H, |
| 4969 |
AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D})) |
4969 |
AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D})) |
| 4970 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
4970 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 4971 |
return; |
4971 |
return; |
| 4972 |
case Intrinsic::aarch64_sve_urshl_x4: |
4972 |
case Intrinsic::aarch64_sve_urshl_x4: |
| 4973 |
if (auto Op = SelectOpcodeFromVT( |
4973 |
if (auto Op = SelectOpcodeFromVT( |
| 4974 |
Node->getValueType(0), |
4974 |
Node->getValueType(0), |
| 4975 |
{AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H, |
4975 |
{AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H, |
| 4976 |
AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D})) |
4976 |
AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D})) |
| 4977 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
4977 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 4978 |
return; |
4978 |
return; |
| 4979 |
case Intrinsic::aarch64_sve_sqdmulh_single_vgx2: |
4979 |
case Intrinsic::aarch64_sve_sqdmulh_single_vgx2: |
| 4980 |
if (auto Op = SelectOpcodeFromVT( |
4980 |
if (auto Op = SelectOpcodeFromVT( |
| 4981 |
Node->getValueType(0), |
4981 |
Node->getValueType(0), |
| 4982 |
{AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H, |
4982 |
{AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H, |
| 4983 |
AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D})) |
4983 |
AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D})) |
| 4984 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
4984 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 4985 |
return; |
4985 |
return; |
| 4986 |
case Intrinsic::aarch64_sve_sqdmulh_single_vgx4: |
4986 |
case Intrinsic::aarch64_sve_sqdmulh_single_vgx4: |
| 4987 |
if (auto Op = SelectOpcodeFromVT( |
4987 |
if (auto Op = SelectOpcodeFromVT( |
| 4988 |
Node->getValueType(0), |
4988 |
Node->getValueType(0), |
| 4989 |
{AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H, |
4989 |
{AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H, |
| 4990 |
AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D})) |
4990 |
AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D})) |
| 4991 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
4991 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 4992 |
return; |
4992 |
return; |
| 4993 |
case Intrinsic::aarch64_sve_sqdmulh_vgx2: |
4993 |
case Intrinsic::aarch64_sve_sqdmulh_vgx2: |
| 4994 |
if (auto Op = SelectOpcodeFromVT( |
4994 |
if (auto Op = SelectOpcodeFromVT( |
| 4995 |
Node->getValueType(0), |
4995 |
Node->getValueType(0), |
| 4996 |
{AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H, |
4996 |
{AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H, |
| 4997 |
AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D})) |
4997 |
AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D})) |
| 4998 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
4998 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 4999 |
return; |
4999 |
return; |
| 5000 |
case Intrinsic::aarch64_sve_sqdmulh_vgx4: |
5000 |
case Intrinsic::aarch64_sve_sqdmulh_vgx4: |
| 5001 |
if (auto Op = SelectOpcodeFromVT( |
5001 |
if (auto Op = SelectOpcodeFromVT( |
| 5002 |
Node->getValueType(0), |
5002 |
Node->getValueType(0), |
| 5003 |
{AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H, |
5003 |
{AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H, |
| 5004 |
AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) |
5004 |
AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) |
| 5005 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5005 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5006 |
return; |
5006 |
return; |
| 5007 |
case Intrinsic::aarch64_sve_whilege_x2: |
5007 |
case Intrinsic::aarch64_sve_whilege_x2: |
| 5008 |
if (auto Op = SelectOpcodeFromVT( |
5008 |
if (auto Op = SelectOpcodeFromVT( |
| 5009 |
Node->getValueType(0), |
5009 |
Node->getValueType(0), |
| 5010 |
{AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, |
5010 |
{AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, |
| 5011 |
AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) |
5011 |
AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) |
| 5012 |
SelectWhilePair(Node, Op); |
5012 |
SelectWhilePair(Node, Op); |
| 5013 |
return; |
5013 |
return; |
| 5014 |
case Intrinsic::aarch64_sve_whilegt_x2: |
5014 |
case Intrinsic::aarch64_sve_whilegt_x2: |
| 5015 |
if (auto Op = SelectOpcodeFromVT( |
5015 |
if (auto Op = SelectOpcodeFromVT( |
| 5016 |
Node->getValueType(0), |
5016 |
Node->getValueType(0), |
| 5017 |
{AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, |
5017 |
{AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, |
| 5018 |
AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) |
5018 |
AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) |
| 5019 |
SelectWhilePair(Node, Op); |
5019 |
SelectWhilePair(Node, Op); |
| 5020 |
return; |
5020 |
return; |
| 5021 |
case Intrinsic::aarch64_sve_whilehi_x2: |
5021 |
case Intrinsic::aarch64_sve_whilehi_x2: |
| 5022 |
if (auto Op = SelectOpcodeFromVT( |
5022 |
if (auto Op = SelectOpcodeFromVT( |
| 5023 |
Node->getValueType(0), |
5023 |
Node->getValueType(0), |
| 5024 |
{AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, |
5024 |
{AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, |
| 5025 |
AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) |
5025 |
AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) |
| 5026 |
SelectWhilePair(Node, Op); |
5026 |
SelectWhilePair(Node, Op); |
| 5027 |
return; |
5027 |
return; |
| 5028 |
case Intrinsic::aarch64_sve_whilehs_x2: |
5028 |
case Intrinsic::aarch64_sve_whilehs_x2: |
| 5029 |
if (auto Op = SelectOpcodeFromVT( |
5029 |
if (auto Op = SelectOpcodeFromVT( |
| 5030 |
Node->getValueType(0), |
5030 |
Node->getValueType(0), |
| 5031 |
{AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, |
5031 |
{AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, |
| 5032 |
AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) |
5032 |
AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) |
| 5033 |
SelectWhilePair(Node, Op); |
5033 |
SelectWhilePair(Node, Op); |
| 5034 |
return; |
5034 |
return; |
| 5035 |
case Intrinsic::aarch64_sve_whilele_x2: |
5035 |
case Intrinsic::aarch64_sve_whilele_x2: |
| 5036 |
if (auto Op = SelectOpcodeFromVT( |
5036 |
if (auto Op = SelectOpcodeFromVT( |
| 5037 |
Node->getValueType(0), |
5037 |
Node->getValueType(0), |
| 5038 |
{AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, |
5038 |
{AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, |
| 5039 |
AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) |
5039 |
AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) |
| 5040 |
SelectWhilePair(Node, Op); |
5040 |
SelectWhilePair(Node, Op); |
| 5041 |
return; |
5041 |
return; |
| 5042 |
case Intrinsic::aarch64_sve_whilelo_x2: |
5042 |
case Intrinsic::aarch64_sve_whilelo_x2: |
| 5043 |
if (auto Op = SelectOpcodeFromVT( |
5043 |
if (auto Op = SelectOpcodeFromVT( |
| 5044 |
Node->getValueType(0), |
5044 |
Node->getValueType(0), |
| 5045 |
{AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, |
5045 |
{AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, |
| 5046 |
AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) |
5046 |
AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) |
| 5047 |
SelectWhilePair(Node, Op); |
5047 |
SelectWhilePair(Node, Op); |
| 5048 |
return; |
5048 |
return; |
| 5049 |
case Intrinsic::aarch64_sve_whilels_x2: |
5049 |
case Intrinsic::aarch64_sve_whilels_x2: |
| 5050 |
if (auto Op = SelectOpcodeFromVT( |
5050 |
if (auto Op = SelectOpcodeFromVT( |
| 5051 |
Node->getValueType(0), |
5051 |
Node->getValueType(0), |
| 5052 |
{AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, |
5052 |
{AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, |
| 5053 |
AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) |
5053 |
AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) |
| 5054 |
SelectWhilePair(Node, Op); |
5054 |
SelectWhilePair(Node, Op); |
| 5055 |
return; |
5055 |
return; |
| 5056 |
case Intrinsic::aarch64_sve_whilelt_x2: |
5056 |
case Intrinsic::aarch64_sve_whilelt_x2: |
| 5057 |
if (auto Op = SelectOpcodeFromVT( |
5057 |
if (auto Op = SelectOpcodeFromVT( |
| 5058 |
Node->getValueType(0), |
5058 |
Node->getValueType(0), |
| 5059 |
{AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, |
5059 |
{AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, |
| 5060 |
AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) |
5060 |
AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) |
| 5061 |
SelectWhilePair(Node, Op); |
5061 |
SelectWhilePair(Node, Op); |
| 5062 |
return; |
5062 |
return; |
| 5063 |
case Intrinsic::aarch64_sve_smax_single_x2: |
5063 |
case Intrinsic::aarch64_sve_smax_single_x2: |
| 5064 |
if (auto Op = SelectOpcodeFromVT( |
5064 |
if (auto Op = SelectOpcodeFromVT( |
| 5065 |
Node->getValueType(0), |
5065 |
Node->getValueType(0), |
| 5066 |
{AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H, |
5066 |
{AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H, |
| 5067 |
AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D})) |
5067 |
AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D})) |
| 5068 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5068 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5069 |
return; |
5069 |
return; |
| 5070 |
case Intrinsic::aarch64_sve_umax_single_x2: |
5070 |
case Intrinsic::aarch64_sve_umax_single_x2: |
| 5071 |
if (auto Op = SelectOpcodeFromVT( |
5071 |
if (auto Op = SelectOpcodeFromVT( |
| 5072 |
Node->getValueType(0), |
5072 |
Node->getValueType(0), |
| 5073 |
{AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H, |
5073 |
{AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H, |
| 5074 |
AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D})) |
5074 |
AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D})) |
| 5075 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5075 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5076 |
return; |
5076 |
return; |
| 5077 |
case Intrinsic::aarch64_sve_fmax_single_x2: |
5077 |
case Intrinsic::aarch64_sve_fmax_single_x2: |
| 5078 |
if (auto Op = SelectOpcodeFromVT( |
5078 |
if (auto Op = SelectOpcodeFromVT( |
| 5079 |
Node->getValueType(0), |
5079 |
Node->getValueType(0), |
| 5080 |
{0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S, |
5080 |
{0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S, |
| 5081 |
AArch64::FMAX_VG2_2ZZ_D})) |
5081 |
AArch64::FMAX_VG2_2ZZ_D})) |
| 5082 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5082 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5083 |
return; |
5083 |
return; |
| 5084 |
case Intrinsic::aarch64_sve_smax_single_x4: |
5084 |
case Intrinsic::aarch64_sve_smax_single_x4: |
| 5085 |
if (auto Op = SelectOpcodeFromVT( |
5085 |
if (auto Op = SelectOpcodeFromVT( |
| 5086 |
Node->getValueType(0), |
5086 |
Node->getValueType(0), |
| 5087 |
{AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H, |
5087 |
{AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H, |
| 5088 |
AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D})) |
5088 |
AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D})) |
| 5089 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5089 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5090 |
return; |
5090 |
return; |
| 5091 |
case Intrinsic::aarch64_sve_umax_single_x4: |
5091 |
case Intrinsic::aarch64_sve_umax_single_x4: |
| 5092 |
if (auto Op = SelectOpcodeFromVT( |
5092 |
if (auto Op = SelectOpcodeFromVT( |
| 5093 |
Node->getValueType(0), |
5093 |
Node->getValueType(0), |
| 5094 |
{AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H, |
5094 |
{AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H, |
| 5095 |
AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D})) |
5095 |
AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D})) |
| 5096 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5096 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5097 |
return; |
5097 |
return; |
| 5098 |
case Intrinsic::aarch64_sve_fmax_single_x4: |
5098 |
case Intrinsic::aarch64_sve_fmax_single_x4: |
| 5099 |
if (auto Op = SelectOpcodeFromVT( |
5099 |
if (auto Op = SelectOpcodeFromVT( |
| 5100 |
Node->getValueType(0), |
5100 |
Node->getValueType(0), |
| 5101 |
{0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S, |
5101 |
{0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S, |
| 5102 |
AArch64::FMAX_VG4_4ZZ_D})) |
5102 |
AArch64::FMAX_VG4_4ZZ_D})) |
| 5103 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5103 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5104 |
return; |
5104 |
return; |
| 5105 |
case Intrinsic::aarch64_sve_smin_single_x2: |
5105 |
case Intrinsic::aarch64_sve_smin_single_x2: |
| 5106 |
if (auto Op = SelectOpcodeFromVT( |
5106 |
if (auto Op = SelectOpcodeFromVT( |
| 5107 |
Node->getValueType(0), |
5107 |
Node->getValueType(0), |
| 5108 |
{AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H, |
5108 |
{AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H, |
| 5109 |
AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D})) |
5109 |
AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D})) |
| 5110 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5110 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5111 |
return; |
5111 |
return; |
| 5112 |
case Intrinsic::aarch64_sve_umin_single_x2: |
5112 |
case Intrinsic::aarch64_sve_umin_single_x2: |
| 5113 |
if (auto Op = SelectOpcodeFromVT( |
5113 |
if (auto Op = SelectOpcodeFromVT( |
| 5114 |
Node->getValueType(0), |
5114 |
Node->getValueType(0), |
| 5115 |
{AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H, |
5115 |
{AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H, |
| 5116 |
AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D})) |
5116 |
AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D})) |
| 5117 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5117 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5118 |
return; |
5118 |
return; |
| 5119 |
case Intrinsic::aarch64_sve_fmin_single_x2: |
5119 |
case Intrinsic::aarch64_sve_fmin_single_x2: |
| 5120 |
if (auto Op = SelectOpcodeFromVT( |
5120 |
if (auto Op = SelectOpcodeFromVT( |
| 5121 |
Node->getValueType(0), |
5121 |
Node->getValueType(0), |
| 5122 |
{0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S, |
5122 |
{0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S, |
| 5123 |
AArch64::FMIN_VG2_2ZZ_D})) |
5123 |
AArch64::FMIN_VG2_2ZZ_D})) |
| 5124 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5124 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5125 |
return; |
5125 |
return; |
| 5126 |
case Intrinsic::aarch64_sve_smin_single_x4: |
5126 |
case Intrinsic::aarch64_sve_smin_single_x4: |
| 5127 |
if (auto Op = SelectOpcodeFromVT( |
5127 |
if (auto Op = SelectOpcodeFromVT( |
| 5128 |
Node->getValueType(0), |
5128 |
Node->getValueType(0), |
| 5129 |
{AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H, |
5129 |
{AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H, |
| 5130 |
AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D})) |
5130 |
AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D})) |
| 5131 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5131 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5132 |
return; |
5132 |
return; |
| 5133 |
case Intrinsic::aarch64_sve_umin_single_x4: |
5133 |
case Intrinsic::aarch64_sve_umin_single_x4: |
| 5134 |
if (auto Op = SelectOpcodeFromVT( |
5134 |
if (auto Op = SelectOpcodeFromVT( |
| 5135 |
Node->getValueType(0), |
5135 |
Node->getValueType(0), |
| 5136 |
{AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H, |
5136 |
{AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H, |
| 5137 |
AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D})) |
5137 |
AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D})) |
| 5138 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5138 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5139 |
return; |
5139 |
return; |
| 5140 |
case Intrinsic::aarch64_sve_fmin_single_x4: |
5140 |
case Intrinsic::aarch64_sve_fmin_single_x4: |
| 5141 |
if (auto Op = SelectOpcodeFromVT( |
5141 |
if (auto Op = SelectOpcodeFromVT( |
| 5142 |
Node->getValueType(0), |
5142 |
Node->getValueType(0), |
| 5143 |
{0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S, |
5143 |
{0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S, |
| 5144 |
AArch64::FMIN_VG4_4ZZ_D})) |
5144 |
AArch64::FMIN_VG4_4ZZ_D})) |
| 5145 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5145 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5146 |
return; |
5146 |
return; |
| 5147 |
case Intrinsic::aarch64_sve_smax_x2: |
5147 |
case Intrinsic::aarch64_sve_smax_x2: |
| 5148 |
if (auto Op = SelectOpcodeFromVT( |
5148 |
if (auto Op = SelectOpcodeFromVT( |
| 5149 |
Node->getValueType(0), |
5149 |
Node->getValueType(0), |
| 5150 |
{AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H, |
5150 |
{AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H, |
| 5151 |
AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D})) |
5151 |
AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D})) |
| 5152 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5152 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5153 |
return; |
5153 |
return; |
| 5154 |
case Intrinsic::aarch64_sve_umax_x2: |
5154 |
case Intrinsic::aarch64_sve_umax_x2: |
| 5155 |
if (auto Op = SelectOpcodeFromVT( |
5155 |
if (auto Op = SelectOpcodeFromVT( |
| 5156 |
Node->getValueType(0), |
5156 |
Node->getValueType(0), |
| 5157 |
{AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H, |
5157 |
{AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H, |
| 5158 |
AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D})) |
5158 |
AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D})) |
| 5159 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5159 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5160 |
return; |
5160 |
return; |
| 5161 |
case Intrinsic::aarch64_sve_fmax_x2: |
5161 |
case Intrinsic::aarch64_sve_fmax_x2: |
| 5162 |
if (auto Op = SelectOpcodeFromVT( |
5162 |
if (auto Op = SelectOpcodeFromVT( |
| 5163 |
Node->getValueType(0), |
5163 |
Node->getValueType(0), |
| 5164 |
{0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S, |
5164 |
{0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S, |
| 5165 |
AArch64::FMAX_VG2_2Z2Z_D})) |
5165 |
AArch64::FMAX_VG2_2Z2Z_D})) |
| 5166 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5166 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5167 |
return; |
5167 |
return; |
| 5168 |
case Intrinsic::aarch64_sve_smax_x4: |
5168 |
case Intrinsic::aarch64_sve_smax_x4: |
| 5169 |
if (auto Op = SelectOpcodeFromVT( |
5169 |
if (auto Op = SelectOpcodeFromVT( |
| 5170 |
Node->getValueType(0), |
5170 |
Node->getValueType(0), |
| 5171 |
{AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H, |
5171 |
{AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H, |
| 5172 |
AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D})) |
5172 |
AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D})) |
| 5173 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5173 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5174 |
return; |
5174 |
return; |
| 5175 |
case Intrinsic::aarch64_sve_umax_x4: |
5175 |
case Intrinsic::aarch64_sve_umax_x4: |
| 5176 |
if (auto Op = SelectOpcodeFromVT( |
5176 |
if (auto Op = SelectOpcodeFromVT( |
| 5177 |
Node->getValueType(0), |
5177 |
Node->getValueType(0), |
| 5178 |
{AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H, |
5178 |
{AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H, |
| 5179 |
AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D})) |
5179 |
AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D})) |
| 5180 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5180 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5181 |
return; |
5181 |
return; |
| 5182 |
case Intrinsic::aarch64_sve_fmax_x4: |
5182 |
case Intrinsic::aarch64_sve_fmax_x4: |
| 5183 |
if (auto Op = SelectOpcodeFromVT( |
5183 |
if (auto Op = SelectOpcodeFromVT( |
| 5184 |
Node->getValueType(0), |
5184 |
Node->getValueType(0), |
| 5185 |
{0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S, |
5185 |
{0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S, |
| 5186 |
AArch64::FMAX_VG4_4Z4Z_D})) |
5186 |
AArch64::FMAX_VG4_4Z4Z_D})) |
| 5187 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5187 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5188 |
return; |
5188 |
return; |
| 5189 |
case Intrinsic::aarch64_sve_smin_x2: |
5189 |
case Intrinsic::aarch64_sve_smin_x2: |
| 5190 |
if (auto Op = SelectOpcodeFromVT( |
5190 |
if (auto Op = SelectOpcodeFromVT( |
| 5191 |
Node->getValueType(0), |
5191 |
Node->getValueType(0), |
| 5192 |
{AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H, |
5192 |
{AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H, |
| 5193 |
AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D})) |
5193 |
AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D})) |
| 5194 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5194 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5195 |
return; |
5195 |
return; |
| 5196 |
case Intrinsic::aarch64_sve_umin_x2: |
5196 |
case Intrinsic::aarch64_sve_umin_x2: |
| 5197 |
if (auto Op = SelectOpcodeFromVT( |
5197 |
if (auto Op = SelectOpcodeFromVT( |
| 5198 |
Node->getValueType(0), |
5198 |
Node->getValueType(0), |
| 5199 |
{AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H, |
5199 |
{AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H, |
| 5200 |
AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D})) |
5200 |
AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D})) |
| 5201 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5201 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5202 |
return; |
5202 |
return; |
| 5203 |
case Intrinsic::aarch64_sve_fmin_x2: |
5203 |
case Intrinsic::aarch64_sve_fmin_x2: |
| 5204 |
if (auto Op = SelectOpcodeFromVT( |
5204 |
if (auto Op = SelectOpcodeFromVT( |
| 5205 |
Node->getValueType(0), |
5205 |
Node->getValueType(0), |
| 5206 |
{0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S, |
5206 |
{0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S, |
| 5207 |
AArch64::FMIN_VG2_2Z2Z_D})) |
5207 |
AArch64::FMIN_VG2_2Z2Z_D})) |
| 5208 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5208 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5209 |
return; |
5209 |
return; |
| 5210 |
case Intrinsic::aarch64_sve_smin_x4: |
5210 |
case Intrinsic::aarch64_sve_smin_x4: |
| 5211 |
if (auto Op = SelectOpcodeFromVT( |
5211 |
if (auto Op = SelectOpcodeFromVT( |
| 5212 |
Node->getValueType(0), |
5212 |
Node->getValueType(0), |
| 5213 |
{AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H, |
5213 |
{AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H, |
| 5214 |
AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D})) |
5214 |
AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D})) |
| 5215 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5215 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5216 |
return; |
5216 |
return; |
| 5217 |
case Intrinsic::aarch64_sve_umin_x4: |
5217 |
case Intrinsic::aarch64_sve_umin_x4: |
| 5218 |
if (auto Op = SelectOpcodeFromVT( |
5218 |
if (auto Op = SelectOpcodeFromVT( |
| 5219 |
Node->getValueType(0), |
5219 |
Node->getValueType(0), |
| 5220 |
{AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H, |
5220 |
{AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H, |
| 5221 |
AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D})) |
5221 |
AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D})) |
| 5222 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5222 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5223 |
return; |
5223 |
return; |
| 5224 |
case Intrinsic::aarch64_sve_fmin_x4: |
5224 |
case Intrinsic::aarch64_sve_fmin_x4: |
| 5225 |
if (auto Op = SelectOpcodeFromVT( |
5225 |
if (auto Op = SelectOpcodeFromVT( |
| 5226 |
Node->getValueType(0), |
5226 |
Node->getValueType(0), |
| 5227 |
{0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S, |
5227 |
{0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S, |
| 5228 |
AArch64::FMIN_VG4_4Z4Z_D})) |
5228 |
AArch64::FMIN_VG4_4Z4Z_D})) |
| 5229 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5229 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5230 |
return; |
5230 |
return; |
| 5231 |
case Intrinsic::aarch64_sve_fmaxnm_single_x2 : |
5231 |
case Intrinsic::aarch64_sve_fmaxnm_single_x2 : |
| 5232 |
if (auto Op = SelectOpcodeFromVT( |
5232 |
if (auto Op = SelectOpcodeFromVT( |
| 5233 |
Node->getValueType(0), |
5233 |
Node->getValueType(0), |
| 5234 |
{0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S, |
5234 |
{0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S, |
| 5235 |
AArch64::FMAXNM_VG2_2ZZ_D})) |
5235 |
AArch64::FMAXNM_VG2_2ZZ_D})) |
| 5236 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5236 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5237 |
return; |
5237 |
return; |
| 5238 |
case Intrinsic::aarch64_sve_fmaxnm_single_x4 : |
5238 |
case Intrinsic::aarch64_sve_fmaxnm_single_x4 : |
| 5239 |
if (auto Op = SelectOpcodeFromVT( |
5239 |
if (auto Op = SelectOpcodeFromVT( |
| 5240 |
Node->getValueType(0), |
5240 |
Node->getValueType(0), |
| 5241 |
{0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S, |
5241 |
{0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S, |
| 5242 |
AArch64::FMAXNM_VG4_4ZZ_D})) |
5242 |
AArch64::FMAXNM_VG4_4ZZ_D})) |
| 5243 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5243 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5244 |
return; |
5244 |
return; |
| 5245 |
case Intrinsic::aarch64_sve_fminnm_single_x2: |
5245 |
case Intrinsic::aarch64_sve_fminnm_single_x2: |
| 5246 |
if (auto Op = SelectOpcodeFromVT( |
5246 |
if (auto Op = SelectOpcodeFromVT( |
| 5247 |
Node->getValueType(0), |
5247 |
Node->getValueType(0), |
| 5248 |
{0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S, |
5248 |
{0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S, |
| 5249 |
AArch64::FMINNM_VG2_2ZZ_D})) |
5249 |
AArch64::FMINNM_VG2_2ZZ_D})) |
| 5250 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5250 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5251 |
return; |
5251 |
return; |
| 5252 |
case Intrinsic::aarch64_sve_fminnm_single_x4: |
5252 |
case Intrinsic::aarch64_sve_fminnm_single_x4: |
| 5253 |
if (auto Op = SelectOpcodeFromVT( |
5253 |
if (auto Op = SelectOpcodeFromVT( |
| 5254 |
Node->getValueType(0), |
5254 |
Node->getValueType(0), |
| 5255 |
{0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S, |
5255 |
{0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S, |
| 5256 |
AArch64::FMINNM_VG4_4ZZ_D})) |
5256 |
AArch64::FMINNM_VG4_4ZZ_D})) |
| 5257 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5257 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5258 |
return; |
5258 |
return; |
| 5259 |
case Intrinsic::aarch64_sve_fmaxnm_x2: |
5259 |
case Intrinsic::aarch64_sve_fmaxnm_x2: |
| 5260 |
if (auto Op = SelectOpcodeFromVT( |
5260 |
if (auto Op = SelectOpcodeFromVT( |
| 5261 |
Node->getValueType(0), |
5261 |
Node->getValueType(0), |
| 5262 |
{0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S, |
5262 |
{0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S, |
| 5263 |
AArch64::FMAXNM_VG2_2Z2Z_D})) |
5263 |
AArch64::FMAXNM_VG2_2Z2Z_D})) |
| 5264 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5264 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5265 |
return; |
5265 |
return; |
| 5266 |
case Intrinsic::aarch64_sve_fmaxnm_x4: |
5266 |
case Intrinsic::aarch64_sve_fmaxnm_x4: |
| 5267 |
if (auto Op = SelectOpcodeFromVT( |
5267 |
if (auto Op = SelectOpcodeFromVT( |
| 5268 |
Node->getValueType(0), |
5268 |
Node->getValueType(0), |
| 5269 |
{0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S, |
5269 |
{0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S, |
| 5270 |
AArch64::FMAXNM_VG4_4Z4Z_D})) |
5270 |
AArch64::FMAXNM_VG4_4Z4Z_D})) |
| 5271 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5271 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5272 |
return; |
5272 |
return; |
| 5273 |
case Intrinsic::aarch64_sve_fminnm_x2: |
5273 |
case Intrinsic::aarch64_sve_fminnm_x2: |
| 5274 |
if (auto Op = SelectOpcodeFromVT( |
5274 |
if (auto Op = SelectOpcodeFromVT( |
| 5275 |
Node->getValueType(0), |
5275 |
Node->getValueType(0), |
| 5276 |
{0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S, |
5276 |
{0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S, |
| 5277 |
AArch64::FMINNM_VG2_2Z2Z_D})) |
5277 |
AArch64::FMINNM_VG2_2Z2Z_D})) |
| 5278 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
5278 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op); |
| 5279 |
return; |
5279 |
return; |
| 5280 |
case Intrinsic::aarch64_sve_fminnm_x4: |
5280 |
case Intrinsic::aarch64_sve_fminnm_x4: |
| 5281 |
if (auto Op = SelectOpcodeFromVT( |
5281 |
if (auto Op = SelectOpcodeFromVT( |
| 5282 |
Node->getValueType(0), |
5282 |
Node->getValueType(0), |
| 5283 |
{0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S, |
5283 |
{0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S, |
| 5284 |
AArch64::FMINNM_VG4_4Z4Z_D})) |
5284 |
AArch64::FMINNM_VG4_4Z4Z_D})) |
| 5285 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
5285 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op); |
| 5286 |
return; |
5286 |
return; |
| 5287 |
case Intrinsic::aarch64_sve_fcvts_x2: |
5287 |
case Intrinsic::aarch64_sve_fcvts_x2: |
| 5288 |
SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); |
5288 |
SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); |
| 5289 |
return; |
5289 |
return; |
| 5290 |
case Intrinsic::aarch64_sve_scvtf_x2: |
5290 |
case Intrinsic::aarch64_sve_scvtf_x2: |
| 5291 |
SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); |
5291 |
SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); |
| 5292 |
return; |
5292 |
return; |
| 5293 |
case Intrinsic::aarch64_sve_fcvtu_x2: |
5293 |
case Intrinsic::aarch64_sve_fcvtu_x2: |
| 5294 |
SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); |
5294 |
SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); |
| 5295 |
return; |
5295 |
return; |
| 5296 |
case Intrinsic::aarch64_sve_ucvtf_x2: |
5296 |
case Intrinsic::aarch64_sve_ucvtf_x2: |
| 5297 |
SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); |
5297 |
SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); |
| 5298 |
return; |
5298 |
return; |
| 5299 |
case Intrinsic::aarch64_sve_fcvts_x4: |
5299 |
case Intrinsic::aarch64_sve_fcvts_x4: |
| 5300 |
SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); |
5300 |
SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); |
| 5301 |
return; |
5301 |
return; |
| 5302 |
case Intrinsic::aarch64_sve_scvtf_x4: |
5302 |
case Intrinsic::aarch64_sve_scvtf_x4: |
| 5303 |
SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); |
5303 |
SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); |
| 5304 |
return; |
5304 |
return; |
| 5305 |
case Intrinsic::aarch64_sve_fcvtu_x4: |
5305 |
case Intrinsic::aarch64_sve_fcvtu_x4: |
| 5306 |
SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); |
5306 |
SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); |
| 5307 |
return; |
5307 |
return; |
| 5308 |
case Intrinsic::aarch64_sve_ucvtf_x4: |
5308 |
case Intrinsic::aarch64_sve_ucvtf_x4: |
| 5309 |
SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); |
5309 |
SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); |
| 5310 |
return; |
5310 |
return; |
| 5311 |
case Intrinsic::aarch64_sve_sclamp_single_x2: |
5311 |
case Intrinsic::aarch64_sve_sclamp_single_x2: |
| 5312 |
if (auto Op = SelectOpcodeFromVT( |
5312 |
if (auto Op = SelectOpcodeFromVT( |
| 5313 |
Node->getValueType(0), |
5313 |
Node->getValueType(0), |
| 5314 |
{AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H, |
5314 |
{AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H, |
| 5315 |
AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D})) |
5315 |
AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D})) |
| 5316 |
SelectClamp(Node, 2, Op); |
5316 |
SelectClamp(Node, 2, Op); |
| 5317 |
return; |
5317 |
return; |
| 5318 |
case Intrinsic::aarch64_sve_uclamp_single_x2: |
5318 |
case Intrinsic::aarch64_sve_uclamp_single_x2: |
| 5319 |
if (auto Op = SelectOpcodeFromVT( |
5319 |
if (auto Op = SelectOpcodeFromVT( |
| 5320 |
Node->getValueType(0), |
5320 |
Node->getValueType(0), |
| 5321 |
{AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H, |
5321 |
{AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H, |
| 5322 |
AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D})) |
5322 |
AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D})) |
| 5323 |
SelectClamp(Node, 2, Op); |
5323 |
SelectClamp(Node, 2, Op); |
| 5324 |
return; |
5324 |
return; |
| 5325 |
case Intrinsic::aarch64_sve_fclamp_single_x2: |
5325 |
case Intrinsic::aarch64_sve_fclamp_single_x2: |
| 5326 |
if (auto Op = SelectOpcodeFromVT( |
5326 |
if (auto Op = SelectOpcodeFromVT( |
| 5327 |
Node->getValueType(0), |
5327 |
Node->getValueType(0), |
| 5328 |
{0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S, |
5328 |
{0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S, |
| 5329 |
AArch64::FCLAMP_VG2_2Z2Z_D})) |
5329 |
AArch64::FCLAMP_VG2_2Z2Z_D})) |
| 5330 |
SelectClamp(Node, 2, Op); |
5330 |
SelectClamp(Node, 2, Op); |
| 5331 |
return; |
5331 |
return; |
| 5332 |
case Intrinsic::aarch64_sve_sclamp_single_x4: |
5332 |
case Intrinsic::aarch64_sve_sclamp_single_x4: |
| 5333 |
if (auto Op = SelectOpcodeFromVT( |
5333 |
if (auto Op = SelectOpcodeFromVT( |
| 5334 |
Node->getValueType(0), |
5334 |
Node->getValueType(0), |
| 5335 |
{AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H, |
5335 |
{AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H, |
| 5336 |
AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D})) |
5336 |
AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D})) |
| 5337 |
SelectClamp(Node, 4, Op); |
5337 |
SelectClamp(Node, 4, Op); |
| 5338 |
return; |
5338 |
return; |
| 5339 |
case Intrinsic::aarch64_sve_uclamp_single_x4: |
5339 |
case Intrinsic::aarch64_sve_uclamp_single_x4: |
| 5340 |
if (auto Op = SelectOpcodeFromVT( |
5340 |
if (auto Op = SelectOpcodeFromVT( |
| 5341 |
Node->getValueType(0), |
5341 |
Node->getValueType(0), |
| 5342 |
{AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H, |
5342 |
{AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H, |
| 5343 |
AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D})) |
5343 |
AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D})) |
| 5344 |
SelectClamp(Node, 4, Op); |
5344 |
SelectClamp(Node, 4, Op); |
| 5345 |
return; |
5345 |
return; |
| 5346 |
case Intrinsic::aarch64_sve_fclamp_single_x4: |
5346 |
case Intrinsic::aarch64_sve_fclamp_single_x4: |
| 5347 |
if (auto Op = SelectOpcodeFromVT( |
5347 |
if (auto Op = SelectOpcodeFromVT( |
| 5348 |
Node->getValueType(0), |
5348 |
Node->getValueType(0), |
| 5349 |
{0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S, |
5349 |
{0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S, |
| 5350 |
AArch64::FCLAMP_VG4_4Z4Z_D})) |
5350 |
AArch64::FCLAMP_VG4_4Z4Z_D})) |
| 5351 |
SelectClamp(Node, 4, Op); |
5351 |
SelectClamp(Node, 4, Op); |
| 5352 |
return; |
5352 |
return; |
| 5353 |
case Intrinsic::aarch64_sve_add_single_x2: |
5353 |
case Intrinsic::aarch64_sve_add_single_x2: |
| 5354 |
if (auto Op = SelectOpcodeFromVT( |
5354 |
if (auto Op = SelectOpcodeFromVT( |
| 5355 |
Node->getValueType(0), |
5355 |
Node->getValueType(0), |
| 5356 |
{AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H, |
5356 |
{AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H, |
| 5357 |
AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D})) |
5357 |
AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D})) |
| 5358 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
5358 |
SelectDestructiveMultiIntrinsic(Node, 2, false, Op); |
| 5359 |
return; |
5359 |
return; |
| 5360 |
case Intrinsic::aarch64_sve_add_single_x4: |
5360 |
case Intrinsic::aarch64_sve_add_single_x4: |
| 5361 |
if (auto Op = SelectOpcodeFromVT( |
5361 |
if (auto Op = SelectOpcodeFromVT( |
| 5362 |
Node->getValueType(0), |
5362 |
Node->getValueType(0), |
| 5363 |
{AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H, |
5363 |
{AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H, |
| 5364 |
AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D})) |
5364 |
AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D})) |
| 5365 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
5365 |
SelectDestructiveMultiIntrinsic(Node, 4, false, Op); |
| 5366 |
return; |
5366 |
return; |
| 5367 |
case Intrinsic::aarch64_sve_zip_x2: |
5367 |
case Intrinsic::aarch64_sve_zip_x2: |
| 5368 |
if (auto Op = SelectOpcodeFromVT( |
5368 |
if (auto Op = SelectOpcodeFromVT( |
| 5369 |
Node->getValueType(0), |
5369 |
Node->getValueType(0), |
| 5370 |
{AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H, |
5370 |
{AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H, |
| 5371 |
AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D})) |
5371 |
AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D})) |
| 5372 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
5372 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
| 5373 |
return; |
5373 |
return; |
| 5374 |
case Intrinsic::aarch64_sve_zipq_x2: |
5374 |
case Intrinsic::aarch64_sve_zipq_x2: |
| 5375 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, |
5375 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, |
| 5376 |
AArch64::ZIP_VG2_2ZZZ_Q); |
5376 |
AArch64::ZIP_VG2_2ZZZ_Q); |
| 5377 |
return; |
5377 |
return; |
| 5378 |
case Intrinsic::aarch64_sve_zip_x4: |
5378 |
case Intrinsic::aarch64_sve_zip_x4: |
| 5379 |
if (auto Op = SelectOpcodeFromVT( |
5379 |
if (auto Op = SelectOpcodeFromVT( |
| 5380 |
Node->getValueType(0), |
5380 |
Node->getValueType(0), |
| 5381 |
{AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H, |
5381 |
{AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H, |
| 5382 |
AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D})) |
5382 |
AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D})) |
| 5383 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
5383 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
| 5384 |
return; |
5384 |
return; |
| 5385 |
case Intrinsic::aarch64_sve_zipq_x4: |
5385 |
case Intrinsic::aarch64_sve_zipq_x4: |
| 5386 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, |
5386 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, |
| 5387 |
AArch64::ZIP_VG4_4Z4Z_Q); |
5387 |
AArch64::ZIP_VG4_4Z4Z_Q); |
| 5388 |
return; |
5388 |
return; |
| 5389 |
case Intrinsic::aarch64_sve_uzp_x2: |
5389 |
case Intrinsic::aarch64_sve_uzp_x2: |
| 5390 |
if (auto Op = SelectOpcodeFromVT( |
5390 |
if (auto Op = SelectOpcodeFromVT( |
| 5391 |
Node->getValueType(0), |
5391 |
Node->getValueType(0), |
| 5392 |
{AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H, |
5392 |
{AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H, |
| 5393 |
AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D})) |
5393 |
AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D})) |
| 5394 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
5394 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
| 5395 |
return; |
5395 |
return; |
| 5396 |
case Intrinsic::aarch64_sve_uzpq_x2: |
5396 |
case Intrinsic::aarch64_sve_uzpq_x2: |
| 5397 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, |
5397 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, |
| 5398 |
AArch64::UZP_VG2_2ZZZ_Q); |
5398 |
AArch64::UZP_VG2_2ZZZ_Q); |
| 5399 |
return; |
5399 |
return; |
| 5400 |
case Intrinsic::aarch64_sve_uzp_x4: |
5400 |
case Intrinsic::aarch64_sve_uzp_x4: |
| 5401 |
if (auto Op = SelectOpcodeFromVT( |
5401 |
if (auto Op = SelectOpcodeFromVT( |
| 5402 |
Node->getValueType(0), |
5402 |
Node->getValueType(0), |
| 5403 |
{AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H, |
5403 |
{AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H, |
| 5404 |
AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D})) |
5404 |
AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D})) |
| 5405 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
5405 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
| 5406 |
return; |
5406 |
return; |
| 5407 |
case Intrinsic::aarch64_sve_uzpq_x4: |
5407 |
case Intrinsic::aarch64_sve_uzpq_x4: |
| 5408 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, |
5408 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, |
| 5409 |
AArch64::UZP_VG4_4Z4Z_Q); |
5409 |
AArch64::UZP_VG4_4Z4Z_Q); |
| 5410 |
return; |
5410 |
return; |
| 5411 |
case Intrinsic::aarch64_sve_sel_x2: |
5411 |
case Intrinsic::aarch64_sve_sel_x2: |
| 5412 |
if (auto Op = SelectOpcodeFromVT( |
5412 |
if (auto Op = SelectOpcodeFromVT( |
| 5413 |
Node->getValueType(0), |
5413 |
Node->getValueType(0), |
| 5414 |
{AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H, |
5414 |
{AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H, |
| 5415 |
AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D})) |
5415 |
AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D})) |
| 5416 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true); |
5416 |
SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true); |
| 5417 |
return; |
5417 |
return; |
| 5418 |
case Intrinsic::aarch64_sve_sel_x4: |
5418 |
case Intrinsic::aarch64_sve_sel_x4: |
| 5419 |
if (auto Op = SelectOpcodeFromVT( |
5419 |
if (auto Op = SelectOpcodeFromVT( |
| 5420 |
Node->getValueType(0), |
5420 |
Node->getValueType(0), |
| 5421 |
{AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H, |
5421 |
{AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H, |
| 5422 |
AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D})) |
5422 |
AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D})) |
| 5423 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true); |
5423 |
SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true); |
| 5424 |
return; |
5424 |
return; |
| 5425 |
case Intrinsic::aarch64_sve_frinta_x2: |
5425 |
case Intrinsic::aarch64_sve_frinta_x2: |
| 5426 |
SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); |
5426 |
SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); |
| 5427 |
return; |
5427 |
return; |
| 5428 |
case Intrinsic::aarch64_sve_frinta_x4: |
5428 |
case Intrinsic::aarch64_sve_frinta_x4: |
| 5429 |
SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S); |
5429 |
SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S); |
| 5430 |
return; |
5430 |
return; |
| 5431 |
case Intrinsic::aarch64_sve_frintm_x2: |
5431 |
case Intrinsic::aarch64_sve_frintm_x2: |
| 5432 |
SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S); |
5432 |
SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S); |
| 5433 |
return; |
5433 |
return; |
| 5434 |
case Intrinsic::aarch64_sve_frintm_x4: |
5434 |
case Intrinsic::aarch64_sve_frintm_x4: |
| 5435 |
SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S); |
5435 |
SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S); |
| 5436 |
return; |
5436 |
return; |
| 5437 |
case Intrinsic::aarch64_sve_frintn_x2: |
5437 |
case Intrinsic::aarch64_sve_frintn_x2: |
| 5438 |
SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S); |
5438 |
SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S); |
| 5439 |
return; |
5439 |
return; |
| 5440 |
case Intrinsic::aarch64_sve_frintn_x4: |
5440 |
case Intrinsic::aarch64_sve_frintn_x4: |
| 5441 |
SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S); |
5441 |
SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S); |
| 5442 |
return; |
5442 |
return; |
| 5443 |
case Intrinsic::aarch64_sve_frintp_x2: |
5443 |
case Intrinsic::aarch64_sve_frintp_x2: |
| 5444 |
SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S); |
5444 |
SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S); |
| 5445 |
return; |
5445 |
return; |
| 5446 |
case Intrinsic::aarch64_sve_frintp_x4: |
5446 |
case Intrinsic::aarch64_sve_frintp_x4: |
| 5447 |
SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S); |
5447 |
SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S); |
| 5448 |
return; |
5448 |
return; |
| 5449 |
case Intrinsic::aarch64_sve_sunpk_x2: |
5449 |
case Intrinsic::aarch64_sve_sunpk_x2: |
| 5450 |
if (auto Op = SelectOpcodeFromVT( |
5450 |
if (auto Op = SelectOpcodeFromVT( |
| 5451 |
Node->getValueType(0), |
5451 |
Node->getValueType(0), |
| 5452 |
{0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S, |
5452 |
{0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S, |
| 5453 |
AArch64::SUNPK_VG2_2ZZ_D})) |
5453 |
AArch64::SUNPK_VG2_2ZZ_D})) |
| 5454 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
5454 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
| 5455 |
return; |
5455 |
return; |
| 5456 |
case Intrinsic::aarch64_sve_uunpk_x2: |
5456 |
case Intrinsic::aarch64_sve_uunpk_x2: |
| 5457 |
if (auto Op = SelectOpcodeFromVT( |
5457 |
if (auto Op = SelectOpcodeFromVT( |
| 5458 |
Node->getValueType(0), |
5458 |
Node->getValueType(0), |
| 5459 |
{0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S, |
5459 |
{0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S, |
| 5460 |
AArch64::UUNPK_VG2_2ZZ_D})) |
5460 |
AArch64::UUNPK_VG2_2ZZ_D})) |
| 5461 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
5461 |
SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op); |
| 5462 |
return; |
5462 |
return; |
| 5463 |
case Intrinsic::aarch64_sve_sunpk_x4: |
5463 |
case Intrinsic::aarch64_sve_sunpk_x4: |
| 5464 |
if (auto Op = SelectOpcodeFromVT( |
5464 |
if (auto Op = SelectOpcodeFromVT( |
| 5465 |
Node->getValueType(0), |
5465 |
Node->getValueType(0), |
| 5466 |
{0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S, |
5466 |
{0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S, |
| 5467 |
AArch64::SUNPK_VG4_4Z2Z_D})) |
5467 |
AArch64::SUNPK_VG4_4Z2Z_D})) |
| 5468 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
5468 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
| 5469 |
return; |
5469 |
return; |
| 5470 |
case Intrinsic::aarch64_sve_uunpk_x4: |
5470 |
case Intrinsic::aarch64_sve_uunpk_x4: |
| 5471 |
if (auto Op = SelectOpcodeFromVT( |
5471 |
if (auto Op = SelectOpcodeFromVT( |
| 5472 |
Node->getValueType(0), |
5472 |
Node->getValueType(0), |
| 5473 |
{0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S, |
5473 |
{0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S, |
| 5474 |
AArch64::UUNPK_VG4_4Z2Z_D})) |
5474 |
AArch64::UUNPK_VG4_4Z2Z_D})) |
| 5475 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
5475 |
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op); |
| 5476 |
return; |
5476 |
return; |
| 5477 |
case Intrinsic::aarch64_sve_pext_x2: { |
5477 |
case Intrinsic::aarch64_sve_pext_x2: { |
| 5478 |
if (auto Op = SelectOpcodeFromVT( |
5478 |
if (auto Op = SelectOpcodeFromVT( |
| 5479 |
Node->getValueType(0), |
5479 |
Node->getValueType(0), |
| 5480 |
{AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S, |
5480 |
{AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S, |
| 5481 |
AArch64::PEXT_2PCI_D})) |
5481 |
AArch64::PEXT_2PCI_D})) |
| 5482 |
SelectPExtPair(Node, Op); |
5482 |
SelectPExtPair(Node, Op); |
| 5483 |
return; |
5483 |
return; |
| 5484 |
} |
5484 |
} |
| 5485 |
} |
5485 |
} |
| 5486 |
break; |
5486 |
break; |
| 5487 |
} |
5487 |
} |
| 5488 |
case ISD::INTRINSIC_VOID: { |
5488 |
case ISD::INTRINSIC_VOID: { |
| 5489 |
unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); |
5489 |
unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); |
| 5490 |
if (Node->getNumOperands() >= 3) |
5490 |
if (Node->getNumOperands() >= 3) |
| 5491 |
VT = Node->getOperand(2)->getValueType(0); |
5491 |
VT = Node->getOperand(2)->getValueType(0); |
| 5492 |
switch (IntNo) { |
5492 |
switch (IntNo) { |
| 5493 |
default: |
5493 |
default: |
| 5494 |
break; |
5494 |
break; |
| 5495 |
case Intrinsic::aarch64_neon_st1x2: { |
5495 |
case Intrinsic::aarch64_neon_st1x2: { |
| 5496 |
if (VT == MVT::v8i8) { |
5496 |
if (VT == MVT::v8i8) { |
| 5497 |
SelectStore(Node, 2, AArch64::ST1Twov8b); |
5497 |
SelectStore(Node, 2, AArch64::ST1Twov8b); |
| 5498 |
return; |
5498 |
return; |
| 5499 |
} else if (VT == MVT::v16i8) { |
5499 |
} else if (VT == MVT::v16i8) { |
| 5500 |
SelectStore(Node, 2, AArch64::ST1Twov16b); |
5500 |
SelectStore(Node, 2, AArch64::ST1Twov16b); |
| 5501 |
return; |
5501 |
return; |
| 5502 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
5502 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5503 |
VT == MVT::v4bf16) { |
5503 |
VT == MVT::v4bf16) { |
| 5504 |
SelectStore(Node, 2, AArch64::ST1Twov4h); |
5504 |
SelectStore(Node, 2, AArch64::ST1Twov4h); |
| 5505 |
return; |
5505 |
return; |
| 5506 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
5506 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
| 5507 |
VT == MVT::v8bf16) { |
5507 |
VT == MVT::v8bf16) { |
| 5508 |
SelectStore(Node, 2, AArch64::ST1Twov8h); |
5508 |
SelectStore(Node, 2, AArch64::ST1Twov8h); |
| 5509 |
return; |
5509 |
return; |
| 5510 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5510 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5511 |
SelectStore(Node, 2, AArch64::ST1Twov2s); |
5511 |
SelectStore(Node, 2, AArch64::ST1Twov2s); |
| 5512 |
return; |
5512 |
return; |
| 5513 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5513 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5514 |
SelectStore(Node, 2, AArch64::ST1Twov4s); |
5514 |
SelectStore(Node, 2, AArch64::ST1Twov4s); |
| 5515 |
return; |
5515 |
return; |
| 5516 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5516 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5517 |
SelectStore(Node, 2, AArch64::ST1Twov2d); |
5517 |
SelectStore(Node, 2, AArch64::ST1Twov2d); |
| 5518 |
return; |
5518 |
return; |
| 5519 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5519 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5520 |
SelectStore(Node, 2, AArch64::ST1Twov1d); |
5520 |
SelectStore(Node, 2, AArch64::ST1Twov1d); |
| 5521 |
return; |
5521 |
return; |
| 5522 |
} |
5522 |
} |
| 5523 |
break; |
5523 |
break; |
| 5524 |
} |
5524 |
} |
| 5525 |
case Intrinsic::aarch64_neon_st1x3: { |
5525 |
case Intrinsic::aarch64_neon_st1x3: { |
| 5526 |
if (VT == MVT::v8i8) { |
5526 |
if (VT == MVT::v8i8) { |
| 5527 |
SelectStore(Node, 3, AArch64::ST1Threev8b); |
5527 |
SelectStore(Node, 3, AArch64::ST1Threev8b); |
| 5528 |
return; |
5528 |
return; |
| 5529 |
} else if (VT == MVT::v16i8) { |
5529 |
} else if (VT == MVT::v16i8) { |
| 5530 |
SelectStore(Node, 3, AArch64::ST1Threev16b); |
5530 |
SelectStore(Node, 3, AArch64::ST1Threev16b); |
| 5531 |
return; |
5531 |
return; |
| 5532 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
5532 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5533 |
VT == MVT::v4bf16) { |
5533 |
VT == MVT::v4bf16) { |
| 5534 |
SelectStore(Node, 3, AArch64::ST1Threev4h); |
5534 |
SelectStore(Node, 3, AArch64::ST1Threev4h); |
| 5535 |
return; |
5535 |
return; |
| 5536 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
5536 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
| 5537 |
VT == MVT::v8bf16) { |
5537 |
VT == MVT::v8bf16) { |
| 5538 |
SelectStore(Node, 3, AArch64::ST1Threev8h); |
5538 |
SelectStore(Node, 3, AArch64::ST1Threev8h); |
| 5539 |
return; |
5539 |
return; |
| 5540 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5540 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5541 |
SelectStore(Node, 3, AArch64::ST1Threev2s); |
5541 |
SelectStore(Node, 3, AArch64::ST1Threev2s); |
| 5542 |
return; |
5542 |
return; |
| 5543 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5543 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5544 |
SelectStore(Node, 3, AArch64::ST1Threev4s); |
5544 |
SelectStore(Node, 3, AArch64::ST1Threev4s); |
| 5545 |
return; |
5545 |
return; |
| 5546 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5546 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5547 |
SelectStore(Node, 3, AArch64::ST1Threev2d); |
5547 |
SelectStore(Node, 3, AArch64::ST1Threev2d); |
| 5548 |
return; |
5548 |
return; |
| 5549 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5549 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5550 |
SelectStore(Node, 3, AArch64::ST1Threev1d); |
5550 |
SelectStore(Node, 3, AArch64::ST1Threev1d); |
| 5551 |
return; |
5551 |
return; |
| 5552 |
} |
5552 |
} |
| 5553 |
break; |
5553 |
break; |
| 5554 |
} |
5554 |
} |
| 5555 |
case Intrinsic::aarch64_neon_st1x4: { |
5555 |
case Intrinsic::aarch64_neon_st1x4: { |
| 5556 |
if (VT == MVT::v8i8) { |
5556 |
if (VT == MVT::v8i8) { |
| 5557 |
SelectStore(Node, 4, AArch64::ST1Fourv8b); |
5557 |
SelectStore(Node, 4, AArch64::ST1Fourv8b); |
| 5558 |
return; |
5558 |
return; |
| 5559 |
} else if (VT == MVT::v16i8) { |
5559 |
} else if (VT == MVT::v16i8) { |
| 5560 |
SelectStore(Node, 4, AArch64::ST1Fourv16b); |
5560 |
SelectStore(Node, 4, AArch64::ST1Fourv16b); |
| 5561 |
return; |
5561 |
return; |
| 5562 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
5562 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5563 |
VT == MVT::v4bf16) { |
5563 |
VT == MVT::v4bf16) { |
| 5564 |
SelectStore(Node, 4, AArch64::ST1Fourv4h); |
5564 |
SelectStore(Node, 4, AArch64::ST1Fourv4h); |
| 5565 |
return; |
5565 |
return; |
| 5566 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
5566 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
| 5567 |
VT == MVT::v8bf16) { |
5567 |
VT == MVT::v8bf16) { |
| 5568 |
SelectStore(Node, 4, AArch64::ST1Fourv8h); |
5568 |
SelectStore(Node, 4, AArch64::ST1Fourv8h); |
| 5569 |
return; |
5569 |
return; |
| 5570 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5570 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5571 |
SelectStore(Node, 4, AArch64::ST1Fourv2s); |
5571 |
SelectStore(Node, 4, AArch64::ST1Fourv2s); |
| 5572 |
return; |
5572 |
return; |
| 5573 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5573 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5574 |
SelectStore(Node, 4, AArch64::ST1Fourv4s); |
5574 |
SelectStore(Node, 4, AArch64::ST1Fourv4s); |
| 5575 |
return; |
5575 |
return; |
| 5576 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5576 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5577 |
SelectStore(Node, 4, AArch64::ST1Fourv2d); |
5577 |
SelectStore(Node, 4, AArch64::ST1Fourv2d); |
| 5578 |
return; |
5578 |
return; |
| 5579 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5579 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5580 |
SelectStore(Node, 4, AArch64::ST1Fourv1d); |
5580 |
SelectStore(Node, 4, AArch64::ST1Fourv1d); |
| 5581 |
return; |
5581 |
return; |
| 5582 |
} |
5582 |
} |
| 5583 |
break; |
5583 |
break; |
| 5584 |
} |
5584 |
} |
| 5585 |
case Intrinsic::aarch64_neon_st2: { |
5585 |
case Intrinsic::aarch64_neon_st2: { |
| 5586 |
if (VT == MVT::v8i8) { |
5586 |
if (VT == MVT::v8i8) { |
| 5587 |
SelectStore(Node, 2, AArch64::ST2Twov8b); |
5587 |
SelectStore(Node, 2, AArch64::ST2Twov8b); |
| 5588 |
return; |
5588 |
return; |
| 5589 |
} else if (VT == MVT::v16i8) { |
5589 |
} else if (VT == MVT::v16i8) { |
| 5590 |
SelectStore(Node, 2, AArch64::ST2Twov16b); |
5590 |
SelectStore(Node, 2, AArch64::ST2Twov16b); |
| 5591 |
return; |
5591 |
return; |
| 5592 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
5592 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5593 |
VT == MVT::v4bf16) { |
5593 |
VT == MVT::v4bf16) { |
| 5594 |
SelectStore(Node, 2, AArch64::ST2Twov4h); |
5594 |
SelectStore(Node, 2, AArch64::ST2Twov4h); |
| 5595 |
return; |
5595 |
return; |
| 5596 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
5596 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
| 5597 |
VT == MVT::v8bf16) { |
5597 |
VT == MVT::v8bf16) { |
| 5598 |
SelectStore(Node, 2, AArch64::ST2Twov8h); |
5598 |
SelectStore(Node, 2, AArch64::ST2Twov8h); |
| 5599 |
return; |
5599 |
return; |
| 5600 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5600 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5601 |
SelectStore(Node, 2, AArch64::ST2Twov2s); |
5601 |
SelectStore(Node, 2, AArch64::ST2Twov2s); |
| 5602 |
return; |
5602 |
return; |
| 5603 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5603 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5604 |
SelectStore(Node, 2, AArch64::ST2Twov4s); |
5604 |
SelectStore(Node, 2, AArch64::ST2Twov4s); |
| 5605 |
return; |
5605 |
return; |
| 5606 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5606 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5607 |
SelectStore(Node, 2, AArch64::ST2Twov2d); |
5607 |
SelectStore(Node, 2, AArch64::ST2Twov2d); |
| 5608 |
return; |
5608 |
return; |
| 5609 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5609 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5610 |
SelectStore(Node, 2, AArch64::ST1Twov1d); |
5610 |
SelectStore(Node, 2, AArch64::ST1Twov1d); |
| 5611 |
return; |
5611 |
return; |
| 5612 |
} |
5612 |
} |
| 5613 |
break; |
5613 |
break; |
| 5614 |
} |
5614 |
} |
| 5615 |
case Intrinsic::aarch64_neon_st3: { |
5615 |
case Intrinsic::aarch64_neon_st3: { |
| 5616 |
if (VT == MVT::v8i8) { |
5616 |
if (VT == MVT::v8i8) { |
| 5617 |
SelectStore(Node, 3, AArch64::ST3Threev8b); |
5617 |
SelectStore(Node, 3, AArch64::ST3Threev8b); |
| 5618 |
return; |
5618 |
return; |
| 5619 |
} else if (VT == MVT::v16i8) { |
5619 |
} else if (VT == MVT::v16i8) { |
| 5620 |
SelectStore(Node, 3, AArch64::ST3Threev16b); |
5620 |
SelectStore(Node, 3, AArch64::ST3Threev16b); |
| 5621 |
return; |
5621 |
return; |
| 5622 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
5622 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5623 |
VT == MVT::v4bf16) { |
5623 |
VT == MVT::v4bf16) { |
| 5624 |
SelectStore(Node, 3, AArch64::ST3Threev4h); |
5624 |
SelectStore(Node, 3, AArch64::ST3Threev4h); |
| 5625 |
return; |
5625 |
return; |
| 5626 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
5626 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
| 5627 |
VT == MVT::v8bf16) { |
5627 |
VT == MVT::v8bf16) { |
| 5628 |
SelectStore(Node, 3, AArch64::ST3Threev8h); |
5628 |
SelectStore(Node, 3, AArch64::ST3Threev8h); |
| 5629 |
return; |
5629 |
return; |
| 5630 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5630 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5631 |
SelectStore(Node, 3, AArch64::ST3Threev2s); |
5631 |
SelectStore(Node, 3, AArch64::ST3Threev2s); |
| 5632 |
return; |
5632 |
return; |
| 5633 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5633 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5634 |
SelectStore(Node, 3, AArch64::ST3Threev4s); |
5634 |
SelectStore(Node, 3, AArch64::ST3Threev4s); |
| 5635 |
return; |
5635 |
return; |
| 5636 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5636 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5637 |
SelectStore(Node, 3, AArch64::ST3Threev2d); |
5637 |
SelectStore(Node, 3, AArch64::ST3Threev2d); |
| 5638 |
return; |
5638 |
return; |
| 5639 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5639 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5640 |
SelectStore(Node, 3, AArch64::ST1Threev1d); |
5640 |
SelectStore(Node, 3, AArch64::ST1Threev1d); |
| 5641 |
return; |
5641 |
return; |
| 5642 |
} |
5642 |
} |
| 5643 |
break; |
5643 |
break; |
| 5644 |
} |
5644 |
} |
| 5645 |
case Intrinsic::aarch64_neon_st4: { |
5645 |
case Intrinsic::aarch64_neon_st4: { |
| 5646 |
if (VT == MVT::v8i8) { |
5646 |
if (VT == MVT::v8i8) { |
| 5647 |
SelectStore(Node, 4, AArch64::ST4Fourv8b); |
5647 |
SelectStore(Node, 4, AArch64::ST4Fourv8b); |
| 5648 |
return; |
5648 |
return; |
| 5649 |
} else if (VT == MVT::v16i8) { |
5649 |
} else if (VT == MVT::v16i8) { |
| 5650 |
SelectStore(Node, 4, AArch64::ST4Fourv16b); |
5650 |
SelectStore(Node, 4, AArch64::ST4Fourv16b); |
| 5651 |
return; |
5651 |
return; |
| 5652 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
5652 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5653 |
VT == MVT::v4bf16) { |
5653 |
VT == MVT::v4bf16) { |
| 5654 |
SelectStore(Node, 4, AArch64::ST4Fourv4h); |
5654 |
SelectStore(Node, 4, AArch64::ST4Fourv4h); |
| 5655 |
return; |
5655 |
return; |
| 5656 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
5656 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || |
| 5657 |
VT == MVT::v8bf16) { |
5657 |
VT == MVT::v8bf16) { |
| 5658 |
SelectStore(Node, 4, AArch64::ST4Fourv8h); |
5658 |
SelectStore(Node, 4, AArch64::ST4Fourv8h); |
| 5659 |
return; |
5659 |
return; |
| 5660 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5660 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5661 |
SelectStore(Node, 4, AArch64::ST4Fourv2s); |
5661 |
SelectStore(Node, 4, AArch64::ST4Fourv2s); |
| 5662 |
return; |
5662 |
return; |
| 5663 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5663 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5664 |
SelectStore(Node, 4, AArch64::ST4Fourv4s); |
5664 |
SelectStore(Node, 4, AArch64::ST4Fourv4s); |
| 5665 |
return; |
5665 |
return; |
| 5666 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5666 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5667 |
SelectStore(Node, 4, AArch64::ST4Fourv2d); |
5667 |
SelectStore(Node, 4, AArch64::ST4Fourv2d); |
| 5668 |
return; |
5668 |
return; |
| 5669 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5669 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5670 |
SelectStore(Node, 4, AArch64::ST1Fourv1d); |
5670 |
SelectStore(Node, 4, AArch64::ST1Fourv1d); |
| 5671 |
return; |
5671 |
return; |
| 5672 |
} |
5672 |
} |
| 5673 |
break; |
5673 |
break; |
| 5674 |
} |
5674 |
} |
| 5675 |
case Intrinsic::aarch64_neon_st2lane: { |
5675 |
case Intrinsic::aarch64_neon_st2lane: { |
| 5676 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
5676 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 5677 |
SelectStoreLane(Node, 2, AArch64::ST2i8); |
5677 |
SelectStoreLane(Node, 2, AArch64::ST2i8); |
| 5678 |
return; |
5678 |
return; |
| 5679 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
5679 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5680 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
5680 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 5681 |
SelectStoreLane(Node, 2, AArch64::ST2i16); |
5681 |
SelectStoreLane(Node, 2, AArch64::ST2i16); |
| 5682 |
return; |
5682 |
return; |
| 5683 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
5683 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 5684 |
VT == MVT::v2f32) { |
5684 |
VT == MVT::v2f32) { |
| 5685 |
SelectStoreLane(Node, 2, AArch64::ST2i32); |
5685 |
SelectStoreLane(Node, 2, AArch64::ST2i32); |
| 5686 |
return; |
5686 |
return; |
| 5687 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
5687 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 5688 |
VT == MVT::v1f64) { |
5688 |
VT == MVT::v1f64) { |
| 5689 |
SelectStoreLane(Node, 2, AArch64::ST2i64); |
5689 |
SelectStoreLane(Node, 2, AArch64::ST2i64); |
| 5690 |
return; |
5690 |
return; |
| 5691 |
} |
5691 |
} |
| 5692 |
break; |
5692 |
break; |
| 5693 |
} |
5693 |
} |
| 5694 |
case Intrinsic::aarch64_neon_st3lane: { |
5694 |
case Intrinsic::aarch64_neon_st3lane: { |
| 5695 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
5695 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 5696 |
SelectStoreLane(Node, 3, AArch64::ST3i8); |
5696 |
SelectStoreLane(Node, 3, AArch64::ST3i8); |
| 5697 |
return; |
5697 |
return; |
| 5698 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
5698 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5699 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
5699 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 5700 |
SelectStoreLane(Node, 3, AArch64::ST3i16); |
5700 |
SelectStoreLane(Node, 3, AArch64::ST3i16); |
| 5701 |
return; |
5701 |
return; |
| 5702 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
5702 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 5703 |
VT == MVT::v2f32) { |
5703 |
VT == MVT::v2f32) { |
| 5704 |
SelectStoreLane(Node, 3, AArch64::ST3i32); |
5704 |
SelectStoreLane(Node, 3, AArch64::ST3i32); |
| 5705 |
return; |
5705 |
return; |
| 5706 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
5706 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 5707 |
VT == MVT::v1f64) { |
5707 |
VT == MVT::v1f64) { |
| 5708 |
SelectStoreLane(Node, 3, AArch64::ST3i64); |
5708 |
SelectStoreLane(Node, 3, AArch64::ST3i64); |
| 5709 |
return; |
5709 |
return; |
| 5710 |
} |
5710 |
} |
| 5711 |
break; |
5711 |
break; |
| 5712 |
} |
5712 |
} |
| 5713 |
case Intrinsic::aarch64_neon_st4lane: { |
5713 |
case Intrinsic::aarch64_neon_st4lane: { |
| 5714 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
5714 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 5715 |
SelectStoreLane(Node, 4, AArch64::ST4i8); |
5715 |
SelectStoreLane(Node, 4, AArch64::ST4i8); |
| 5716 |
return; |
5716 |
return; |
| 5717 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
5717 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 5718 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
5718 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 5719 |
SelectStoreLane(Node, 4, AArch64::ST4i16); |
5719 |
SelectStoreLane(Node, 4, AArch64::ST4i16); |
| 5720 |
return; |
5720 |
return; |
| 5721 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
5721 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 5722 |
VT == MVT::v2f32) { |
5722 |
VT == MVT::v2f32) { |
| 5723 |
SelectStoreLane(Node, 4, AArch64::ST4i32); |
5723 |
SelectStoreLane(Node, 4, AArch64::ST4i32); |
| 5724 |
return; |
5724 |
return; |
| 5725 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
5725 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 5726 |
VT == MVT::v1f64) { |
5726 |
VT == MVT::v1f64) { |
| 5727 |
SelectStoreLane(Node, 4, AArch64::ST4i64); |
5727 |
SelectStoreLane(Node, 4, AArch64::ST4i64); |
| 5728 |
return; |
5728 |
return; |
| 5729 |
} |
5729 |
} |
| 5730 |
break; |
5730 |
break; |
| 5731 |
} |
5731 |
} |
| 5732 |
case Intrinsic::aarch64_sve_st2: { |
5732 |
case Intrinsic::aarch64_sve_st2: { |
| 5733 |
if (VT == MVT::nxv16i8) { |
5733 |
if (VT == MVT::nxv16i8) { |
| 5734 |
SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); |
5734 |
SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); |
| 5735 |
return; |
5735 |
return; |
| 5736 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
5736 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 5737 |
VT == MVT::nxv8bf16) { |
5737 |
VT == MVT::nxv8bf16) { |
| 5738 |
SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); |
5738 |
SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); |
| 5739 |
return; |
5739 |
return; |
| 5740 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
5740 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 5741 |
SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); |
5741 |
SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); |
| 5742 |
return; |
5742 |
return; |
| 5743 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
5743 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 5744 |
SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); |
5744 |
SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); |
| 5745 |
return; |
5745 |
return; |
| 5746 |
} |
5746 |
} |
| 5747 |
break; |
5747 |
break; |
| 5748 |
} |
5748 |
} |
| 5749 |
case Intrinsic::aarch64_sve_st3: { |
5749 |
case Intrinsic::aarch64_sve_st3: { |
| 5750 |
if (VT == MVT::nxv16i8) { |
5750 |
if (VT == MVT::nxv16i8) { |
| 5751 |
SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); |
5751 |
SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); |
| 5752 |
return; |
5752 |
return; |
| 5753 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
5753 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 5754 |
VT == MVT::nxv8bf16) { |
5754 |
VT == MVT::nxv8bf16) { |
| 5755 |
SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); |
5755 |
SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); |
| 5756 |
return; |
5756 |
return; |
| 5757 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
5757 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 5758 |
SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); |
5758 |
SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); |
| 5759 |
return; |
5759 |
return; |
| 5760 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
5760 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 5761 |
SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); |
5761 |
SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); |
| 5762 |
return; |
5762 |
return; |
| 5763 |
} |
5763 |
} |
| 5764 |
break; |
5764 |
break; |
| 5765 |
} |
5765 |
} |
| 5766 |
case Intrinsic::aarch64_sve_st4: { |
5766 |
case Intrinsic::aarch64_sve_st4: { |
| 5767 |
if (VT == MVT::nxv16i8) { |
5767 |
if (VT == MVT::nxv16i8) { |
| 5768 |
SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); |
5768 |
SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); |
| 5769 |
return; |
5769 |
return; |
| 5770 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
5770 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 5771 |
VT == MVT::nxv8bf16) { |
5771 |
VT == MVT::nxv8bf16) { |
| 5772 |
SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); |
5772 |
SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); |
| 5773 |
return; |
5773 |
return; |
| 5774 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
5774 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 5775 |
SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); |
5775 |
SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); |
| 5776 |
return; |
5776 |
return; |
| 5777 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
5777 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 5778 |
SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); |
5778 |
SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); |
| 5779 |
return; |
5779 |
return; |
| 5780 |
} |
5780 |
} |
| 5781 |
break; |
5781 |
break; |
| 5782 |
} |
5782 |
} |
| 5783 |
} |
5783 |
} |
| 5784 |
break; |
5784 |
break; |
| 5785 |
} |
5785 |
} |
| 5786 |
case AArch64ISD::LD2post: { |
5786 |
case AArch64ISD::LD2post: { |
| 5787 |
if (VT == MVT::v8i8) { |
5787 |
if (VT == MVT::v8i8) { |
| 5788 |
SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); |
5788 |
SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); |
| 5789 |
return; |
5789 |
return; |
| 5790 |
} else if (VT == MVT::v16i8) { |
5790 |
} else if (VT == MVT::v16i8) { |
| 5791 |
SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); |
5791 |
SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); |
| 5792 |
return; |
5792 |
return; |
| 5793 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5793 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5794 |
SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); |
5794 |
SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); |
| 5795 |
return; |
5795 |
return; |
| 5796 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5796 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5797 |
SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); |
5797 |
SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); |
| 5798 |
return; |
5798 |
return; |
| 5799 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5799 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5800 |
SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); |
5800 |
SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); |
| 5801 |
return; |
5801 |
return; |
| 5802 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5802 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5803 |
SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); |
5803 |
SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); |
| 5804 |
return; |
5804 |
return; |
| 5805 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5805 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5806 |
SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); |
5806 |
SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); |
| 5807 |
return; |
5807 |
return; |
| 5808 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5808 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5809 |
SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); |
5809 |
SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); |
| 5810 |
return; |
5810 |
return; |
| 5811 |
} |
5811 |
} |
| 5812 |
break; |
5812 |
break; |
| 5813 |
} |
5813 |
} |
| 5814 |
case AArch64ISD::LD3post: { |
5814 |
case AArch64ISD::LD3post: { |
| 5815 |
if (VT == MVT::v8i8) { |
5815 |
if (VT == MVT::v8i8) { |
| 5816 |
SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); |
5816 |
SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); |
| 5817 |
return; |
5817 |
return; |
| 5818 |
} else if (VT == MVT::v16i8) { |
5818 |
} else if (VT == MVT::v16i8) { |
| 5819 |
SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); |
5819 |
SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); |
| 5820 |
return; |
5820 |
return; |
| 5821 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5821 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5822 |
SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); |
5822 |
SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); |
| 5823 |
return; |
5823 |
return; |
| 5824 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5824 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5825 |
SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); |
5825 |
SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); |
| 5826 |
return; |
5826 |
return; |
| 5827 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5827 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5828 |
SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); |
5828 |
SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); |
| 5829 |
return; |
5829 |
return; |
| 5830 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5830 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5831 |
SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); |
5831 |
SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); |
| 5832 |
return; |
5832 |
return; |
| 5833 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5833 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5834 |
SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); |
5834 |
SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); |
| 5835 |
return; |
5835 |
return; |
| 5836 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5836 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5837 |
SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); |
5837 |
SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); |
| 5838 |
return; |
5838 |
return; |
| 5839 |
} |
5839 |
} |
| 5840 |
break; |
5840 |
break; |
| 5841 |
} |
5841 |
} |
| 5842 |
case AArch64ISD::LD4post: { |
5842 |
case AArch64ISD::LD4post: { |
| 5843 |
if (VT == MVT::v8i8) { |
5843 |
if (VT == MVT::v8i8) { |
| 5844 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); |
5844 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); |
| 5845 |
return; |
5845 |
return; |
| 5846 |
} else if (VT == MVT::v16i8) { |
5846 |
} else if (VT == MVT::v16i8) { |
| 5847 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); |
5847 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); |
| 5848 |
return; |
5848 |
return; |
| 5849 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5849 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5850 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); |
5850 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); |
| 5851 |
return; |
5851 |
return; |
| 5852 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5852 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5853 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); |
5853 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); |
| 5854 |
return; |
5854 |
return; |
| 5855 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5855 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5856 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); |
5856 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); |
| 5857 |
return; |
5857 |
return; |
| 5858 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5858 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5859 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); |
5859 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); |
| 5860 |
return; |
5860 |
return; |
| 5861 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5861 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5862 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); |
5862 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); |
| 5863 |
return; |
5863 |
return; |
| 5864 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5864 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5865 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); |
5865 |
SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); |
| 5866 |
return; |
5866 |
return; |
| 5867 |
} |
5867 |
} |
| 5868 |
break; |
5868 |
break; |
| 5869 |
} |
5869 |
} |
| 5870 |
case AArch64ISD::LD1x2post: { |
5870 |
case AArch64ISD::LD1x2post: { |
| 5871 |
if (VT == MVT::v8i8) { |
5871 |
if (VT == MVT::v8i8) { |
| 5872 |
SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); |
5872 |
SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); |
| 5873 |
return; |
5873 |
return; |
| 5874 |
} else if (VT == MVT::v16i8) { |
5874 |
} else if (VT == MVT::v16i8) { |
| 5875 |
SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); |
5875 |
SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); |
| 5876 |
return; |
5876 |
return; |
| 5877 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5877 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5878 |
SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); |
5878 |
SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); |
| 5879 |
return; |
5879 |
return; |
| 5880 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5880 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5881 |
SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); |
5881 |
SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); |
| 5882 |
return; |
5882 |
return; |
| 5883 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5883 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5884 |
SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); |
5884 |
SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); |
| 5885 |
return; |
5885 |
return; |
| 5886 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5886 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5887 |
SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); |
5887 |
SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); |
| 5888 |
return; |
5888 |
return; |
| 5889 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5889 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5890 |
SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); |
5890 |
SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); |
| 5891 |
return; |
5891 |
return; |
| 5892 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5892 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5893 |
SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); |
5893 |
SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); |
| 5894 |
return; |
5894 |
return; |
| 5895 |
} |
5895 |
} |
| 5896 |
break; |
5896 |
break; |
| 5897 |
} |
5897 |
} |
| 5898 |
case AArch64ISD::LD1x3post: { |
5898 |
case AArch64ISD::LD1x3post: { |
| 5899 |
if (VT == MVT::v8i8) { |
5899 |
if (VT == MVT::v8i8) { |
| 5900 |
SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); |
5900 |
SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); |
| 5901 |
return; |
5901 |
return; |
| 5902 |
} else if (VT == MVT::v16i8) { |
5902 |
} else if (VT == MVT::v16i8) { |
| 5903 |
SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); |
5903 |
SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); |
| 5904 |
return; |
5904 |
return; |
| 5905 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5905 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5906 |
SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); |
5906 |
SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); |
| 5907 |
return; |
5907 |
return; |
| 5908 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5908 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5909 |
SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); |
5909 |
SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); |
| 5910 |
return; |
5910 |
return; |
| 5911 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5911 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5912 |
SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); |
5912 |
SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); |
| 5913 |
return; |
5913 |
return; |
| 5914 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5914 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5915 |
SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); |
5915 |
SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); |
| 5916 |
return; |
5916 |
return; |
| 5917 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5917 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5918 |
SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); |
5918 |
SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); |
| 5919 |
return; |
5919 |
return; |
| 5920 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5920 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5921 |
SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); |
5921 |
SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); |
| 5922 |
return; |
5922 |
return; |
| 5923 |
} |
5923 |
} |
| 5924 |
break; |
5924 |
break; |
| 5925 |
} |
5925 |
} |
| 5926 |
case AArch64ISD::LD1x4post: { |
5926 |
case AArch64ISD::LD1x4post: { |
| 5927 |
if (VT == MVT::v8i8) { |
5927 |
if (VT == MVT::v8i8) { |
| 5928 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); |
5928 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); |
| 5929 |
return; |
5929 |
return; |
| 5930 |
} else if (VT == MVT::v16i8) { |
5930 |
} else if (VT == MVT::v16i8) { |
| 5931 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); |
5931 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); |
| 5932 |
return; |
5932 |
return; |
| 5933 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5933 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5934 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); |
5934 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); |
| 5935 |
return; |
5935 |
return; |
| 5936 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5936 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5937 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); |
5937 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); |
| 5938 |
return; |
5938 |
return; |
| 5939 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5939 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5940 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); |
5940 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); |
| 5941 |
return; |
5941 |
return; |
| 5942 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5942 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5943 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); |
5943 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); |
| 5944 |
return; |
5944 |
return; |
| 5945 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5945 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5946 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); |
5946 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); |
| 5947 |
return; |
5947 |
return; |
| 5948 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5948 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5949 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); |
5949 |
SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); |
| 5950 |
return; |
5950 |
return; |
| 5951 |
} |
5951 |
} |
| 5952 |
break; |
5952 |
break; |
| 5953 |
} |
5953 |
} |
| 5954 |
case AArch64ISD::LD1DUPpost: { |
5954 |
case AArch64ISD::LD1DUPpost: { |
| 5955 |
if (VT == MVT::v8i8) { |
5955 |
if (VT == MVT::v8i8) { |
| 5956 |
SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); |
5956 |
SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); |
| 5957 |
return; |
5957 |
return; |
| 5958 |
} else if (VT == MVT::v16i8) { |
5958 |
} else if (VT == MVT::v16i8) { |
| 5959 |
SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); |
5959 |
SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); |
| 5960 |
return; |
5960 |
return; |
| 5961 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5961 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5962 |
SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); |
5962 |
SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); |
| 5963 |
return; |
5963 |
return; |
| 5964 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5964 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5965 |
SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); |
5965 |
SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); |
| 5966 |
return; |
5966 |
return; |
| 5967 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5967 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5968 |
SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); |
5968 |
SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); |
| 5969 |
return; |
5969 |
return; |
| 5970 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5970 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5971 |
SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); |
5971 |
SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); |
| 5972 |
return; |
5972 |
return; |
| 5973 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
5973 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 5974 |
SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); |
5974 |
SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); |
| 5975 |
return; |
5975 |
return; |
| 5976 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
5976 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 5977 |
SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); |
5977 |
SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); |
| 5978 |
return; |
5978 |
return; |
| 5979 |
} |
5979 |
} |
| 5980 |
break; |
5980 |
break; |
| 5981 |
} |
5981 |
} |
| 5982 |
case AArch64ISD::LD2DUPpost: { |
5982 |
case AArch64ISD::LD2DUPpost: { |
| 5983 |
if (VT == MVT::v8i8) { |
5983 |
if (VT == MVT::v8i8) { |
| 5984 |
SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); |
5984 |
SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); |
| 5985 |
return; |
5985 |
return; |
| 5986 |
} else if (VT == MVT::v16i8) { |
5986 |
} else if (VT == MVT::v16i8) { |
| 5987 |
SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); |
5987 |
SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); |
| 5988 |
return; |
5988 |
return; |
| 5989 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
5989 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 5990 |
SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); |
5990 |
SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); |
| 5991 |
return; |
5991 |
return; |
| 5992 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
5992 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 5993 |
SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); |
5993 |
SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); |
| 5994 |
return; |
5994 |
return; |
| 5995 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
5995 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 5996 |
SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); |
5996 |
SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); |
| 5997 |
return; |
5997 |
return; |
| 5998 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
5998 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 5999 |
SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); |
5999 |
SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); |
| 6000 |
return; |
6000 |
return; |
| 6001 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6001 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6002 |
SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); |
6002 |
SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); |
| 6003 |
return; |
6003 |
return; |
| 6004 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6004 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6005 |
SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); |
6005 |
SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); |
| 6006 |
return; |
6006 |
return; |
| 6007 |
} |
6007 |
} |
| 6008 |
break; |
6008 |
break; |
| 6009 |
} |
6009 |
} |
| 6010 |
case AArch64ISD::LD3DUPpost: { |
6010 |
case AArch64ISD::LD3DUPpost: { |
| 6011 |
if (VT == MVT::v8i8) { |
6011 |
if (VT == MVT::v8i8) { |
| 6012 |
SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); |
6012 |
SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); |
| 6013 |
return; |
6013 |
return; |
| 6014 |
} else if (VT == MVT::v16i8) { |
6014 |
} else if (VT == MVT::v16i8) { |
| 6015 |
SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); |
6015 |
SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); |
| 6016 |
return; |
6016 |
return; |
| 6017 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6017 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6018 |
SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); |
6018 |
SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); |
| 6019 |
return; |
6019 |
return; |
| 6020 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
6020 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 6021 |
SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); |
6021 |
SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); |
| 6022 |
return; |
6022 |
return; |
| 6023 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6023 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6024 |
SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); |
6024 |
SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); |
| 6025 |
return; |
6025 |
return; |
| 6026 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6026 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6027 |
SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); |
6027 |
SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); |
| 6028 |
return; |
6028 |
return; |
| 6029 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6029 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6030 |
SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); |
6030 |
SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); |
| 6031 |
return; |
6031 |
return; |
| 6032 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6032 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6033 |
SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); |
6033 |
SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); |
| 6034 |
return; |
6034 |
return; |
| 6035 |
} |
6035 |
} |
| 6036 |
break; |
6036 |
break; |
| 6037 |
} |
6037 |
} |
| 6038 |
case AArch64ISD::LD4DUPpost: { |
6038 |
case AArch64ISD::LD4DUPpost: { |
| 6039 |
if (VT == MVT::v8i8) { |
6039 |
if (VT == MVT::v8i8) { |
| 6040 |
SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); |
6040 |
SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); |
| 6041 |
return; |
6041 |
return; |
| 6042 |
} else if (VT == MVT::v16i8) { |
6042 |
} else if (VT == MVT::v16i8) { |
| 6043 |
SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); |
6043 |
SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); |
| 6044 |
return; |
6044 |
return; |
| 6045 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6045 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6046 |
SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); |
6046 |
SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); |
| 6047 |
return; |
6047 |
return; |
| 6048 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
6048 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 6049 |
SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); |
6049 |
SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); |
| 6050 |
return; |
6050 |
return; |
| 6051 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6051 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6052 |
SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); |
6052 |
SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); |
| 6053 |
return; |
6053 |
return; |
| 6054 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6054 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6055 |
SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); |
6055 |
SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); |
| 6056 |
return; |
6056 |
return; |
| 6057 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6057 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6058 |
SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); |
6058 |
SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); |
| 6059 |
return; |
6059 |
return; |
| 6060 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6060 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6061 |
SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); |
6061 |
SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); |
| 6062 |
return; |
6062 |
return; |
| 6063 |
} |
6063 |
} |
| 6064 |
break; |
6064 |
break; |
| 6065 |
} |
6065 |
} |
| 6066 |
case AArch64ISD::LD1LANEpost: { |
6066 |
case AArch64ISD::LD1LANEpost: { |
| 6067 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
6067 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 6068 |
SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); |
6068 |
SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); |
| 6069 |
return; |
6069 |
return; |
| 6070 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
6070 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 6071 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
6071 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 6072 |
SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); |
6072 |
SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); |
| 6073 |
return; |
6073 |
return; |
| 6074 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
6074 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 6075 |
VT == MVT::v2f32) { |
6075 |
VT == MVT::v2f32) { |
| 6076 |
SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); |
6076 |
SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); |
| 6077 |
return; |
6077 |
return; |
| 6078 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
6078 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 6079 |
VT == MVT::v1f64) { |
6079 |
VT == MVT::v1f64) { |
| 6080 |
SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); |
6080 |
SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); |
| 6081 |
return; |
6081 |
return; |
| 6082 |
} |
6082 |
} |
| 6083 |
break; |
6083 |
break; |
| 6084 |
} |
6084 |
} |
| 6085 |
case AArch64ISD::LD2LANEpost: { |
6085 |
case AArch64ISD::LD2LANEpost: { |
| 6086 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
6086 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 6087 |
SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); |
6087 |
SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); |
| 6088 |
return; |
6088 |
return; |
| 6089 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
6089 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 6090 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
6090 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 6091 |
SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); |
6091 |
SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); |
| 6092 |
return; |
6092 |
return; |
| 6093 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
6093 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 6094 |
VT == MVT::v2f32) { |
6094 |
VT == MVT::v2f32) { |
| 6095 |
SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); |
6095 |
SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); |
| 6096 |
return; |
6096 |
return; |
| 6097 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
6097 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 6098 |
VT == MVT::v1f64) { |
6098 |
VT == MVT::v1f64) { |
| 6099 |
SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); |
6099 |
SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); |
| 6100 |
return; |
6100 |
return; |
| 6101 |
} |
6101 |
} |
| 6102 |
break; |
6102 |
break; |
| 6103 |
} |
6103 |
} |
| 6104 |
case AArch64ISD::LD3LANEpost: { |
6104 |
case AArch64ISD::LD3LANEpost: { |
| 6105 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
6105 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 6106 |
SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); |
6106 |
SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); |
| 6107 |
return; |
6107 |
return; |
| 6108 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
6108 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 6109 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
6109 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 6110 |
SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); |
6110 |
SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); |
| 6111 |
return; |
6111 |
return; |
| 6112 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
6112 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 6113 |
VT == MVT::v2f32) { |
6113 |
VT == MVT::v2f32) { |
| 6114 |
SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); |
6114 |
SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); |
| 6115 |
return; |
6115 |
return; |
| 6116 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
6116 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 6117 |
VT == MVT::v1f64) { |
6117 |
VT == MVT::v1f64) { |
| 6118 |
SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); |
6118 |
SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); |
| 6119 |
return; |
6119 |
return; |
| 6120 |
} |
6120 |
} |
| 6121 |
break; |
6121 |
break; |
| 6122 |
} |
6122 |
} |
| 6123 |
case AArch64ISD::LD4LANEpost: { |
6123 |
case AArch64ISD::LD4LANEpost: { |
| 6124 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
6124 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 6125 |
SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); |
6125 |
SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); |
| 6126 |
return; |
6126 |
return; |
| 6127 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
6127 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 6128 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
6128 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 6129 |
SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); |
6129 |
SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); |
| 6130 |
return; |
6130 |
return; |
| 6131 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
6131 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 6132 |
VT == MVT::v2f32) { |
6132 |
VT == MVT::v2f32) { |
| 6133 |
SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); |
6133 |
SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); |
| 6134 |
return; |
6134 |
return; |
| 6135 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
6135 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 6136 |
VT == MVT::v1f64) { |
6136 |
VT == MVT::v1f64) { |
| 6137 |
SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); |
6137 |
SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); |
| 6138 |
return; |
6138 |
return; |
| 6139 |
} |
6139 |
} |
| 6140 |
break; |
6140 |
break; |
| 6141 |
} |
6141 |
} |
| 6142 |
case AArch64ISD::ST2post: { |
6142 |
case AArch64ISD::ST2post: { |
| 6143 |
VT = Node->getOperand(1).getValueType(); |
6143 |
VT = Node->getOperand(1).getValueType(); |
| 6144 |
if (VT == MVT::v8i8) { |
6144 |
if (VT == MVT::v8i8) { |
| 6145 |
SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); |
6145 |
SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); |
| 6146 |
return; |
6146 |
return; |
| 6147 |
} else if (VT == MVT::v16i8) { |
6147 |
} else if (VT == MVT::v16i8) { |
| 6148 |
SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); |
6148 |
SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); |
| 6149 |
return; |
6149 |
return; |
| 6150 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6150 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6151 |
SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); |
6151 |
SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); |
| 6152 |
return; |
6152 |
return; |
| 6153 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
6153 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 6154 |
SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); |
6154 |
SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); |
| 6155 |
return; |
6155 |
return; |
| 6156 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6156 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6157 |
SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); |
6157 |
SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); |
| 6158 |
return; |
6158 |
return; |
| 6159 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6159 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6160 |
SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); |
6160 |
SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); |
| 6161 |
return; |
6161 |
return; |
| 6162 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6162 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6163 |
SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); |
6163 |
SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); |
| 6164 |
return; |
6164 |
return; |
| 6165 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6165 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6166 |
SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); |
6166 |
SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); |
| 6167 |
return; |
6167 |
return; |
| 6168 |
} |
6168 |
} |
| 6169 |
break; |
6169 |
break; |
| 6170 |
} |
6170 |
} |
| 6171 |
case AArch64ISD::ST3post: { |
6171 |
case AArch64ISD::ST3post: { |
| 6172 |
VT = Node->getOperand(1).getValueType(); |
6172 |
VT = Node->getOperand(1).getValueType(); |
| 6173 |
if (VT == MVT::v8i8) { |
6173 |
if (VT == MVT::v8i8) { |
| 6174 |
SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); |
6174 |
SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); |
| 6175 |
return; |
6175 |
return; |
| 6176 |
} else if (VT == MVT::v16i8) { |
6176 |
} else if (VT == MVT::v16i8) { |
| 6177 |
SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); |
6177 |
SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); |
| 6178 |
return; |
6178 |
return; |
| 6179 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6179 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6180 |
SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); |
6180 |
SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); |
| 6181 |
return; |
6181 |
return; |
| 6182 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
6182 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 6183 |
SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); |
6183 |
SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); |
| 6184 |
return; |
6184 |
return; |
| 6185 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6185 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6186 |
SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); |
6186 |
SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); |
| 6187 |
return; |
6187 |
return; |
| 6188 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6188 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6189 |
SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); |
6189 |
SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); |
| 6190 |
return; |
6190 |
return; |
| 6191 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6191 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6192 |
SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); |
6192 |
SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); |
| 6193 |
return; |
6193 |
return; |
| 6194 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6194 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6195 |
SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); |
6195 |
SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); |
| 6196 |
return; |
6196 |
return; |
| 6197 |
} |
6197 |
} |
| 6198 |
break; |
6198 |
break; |
| 6199 |
} |
6199 |
} |
| 6200 |
case AArch64ISD::ST4post: { |
6200 |
case AArch64ISD::ST4post: { |
| 6201 |
VT = Node->getOperand(1).getValueType(); |
6201 |
VT = Node->getOperand(1).getValueType(); |
| 6202 |
if (VT == MVT::v8i8) { |
6202 |
if (VT == MVT::v8i8) { |
| 6203 |
SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); |
6203 |
SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); |
| 6204 |
return; |
6204 |
return; |
| 6205 |
} else if (VT == MVT::v16i8) { |
6205 |
} else if (VT == MVT::v16i8) { |
| 6206 |
SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); |
6206 |
SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); |
| 6207 |
return; |
6207 |
return; |
| 6208 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6208 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6209 |
SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); |
6209 |
SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); |
| 6210 |
return; |
6210 |
return; |
| 6211 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
6211 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 6212 |
SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); |
6212 |
SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); |
| 6213 |
return; |
6213 |
return; |
| 6214 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6214 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6215 |
SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); |
6215 |
SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); |
| 6216 |
return; |
6216 |
return; |
| 6217 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6217 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6218 |
SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); |
6218 |
SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); |
| 6219 |
return; |
6219 |
return; |
| 6220 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6220 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6221 |
SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); |
6221 |
SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); |
| 6222 |
return; |
6222 |
return; |
| 6223 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6223 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6224 |
SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); |
6224 |
SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); |
| 6225 |
return; |
6225 |
return; |
| 6226 |
} |
6226 |
} |
| 6227 |
break; |
6227 |
break; |
| 6228 |
} |
6228 |
} |
| 6229 |
case AArch64ISD::ST1x2post: { |
6229 |
case AArch64ISD::ST1x2post: { |
| 6230 |
VT = Node->getOperand(1).getValueType(); |
6230 |
VT = Node->getOperand(1).getValueType(); |
| 6231 |
if (VT == MVT::v8i8) { |
6231 |
if (VT == MVT::v8i8) { |
| 6232 |
SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); |
6232 |
SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); |
| 6233 |
return; |
6233 |
return; |
| 6234 |
} else if (VT == MVT::v16i8) { |
6234 |
} else if (VT == MVT::v16i8) { |
| 6235 |
SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); |
6235 |
SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); |
| 6236 |
return; |
6236 |
return; |
| 6237 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6237 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6238 |
SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); |
6238 |
SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); |
| 6239 |
return; |
6239 |
return; |
| 6240 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
6240 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 6241 |
SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); |
6241 |
SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); |
| 6242 |
return; |
6242 |
return; |
| 6243 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6243 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6244 |
SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); |
6244 |
SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); |
| 6245 |
return; |
6245 |
return; |
| 6246 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6246 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6247 |
SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); |
6247 |
SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); |
| 6248 |
return; |
6248 |
return; |
| 6249 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6249 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6250 |
SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); |
6250 |
SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); |
| 6251 |
return; |
6251 |
return; |
| 6252 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6252 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6253 |
SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); |
6253 |
SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); |
| 6254 |
return; |
6254 |
return; |
| 6255 |
} |
6255 |
} |
| 6256 |
break; |
6256 |
break; |
| 6257 |
} |
6257 |
} |
| 6258 |
case AArch64ISD::ST1x3post: { |
6258 |
case AArch64ISD::ST1x3post: { |
| 6259 |
VT = Node->getOperand(1).getValueType(); |
6259 |
VT = Node->getOperand(1).getValueType(); |
| 6260 |
if (VT == MVT::v8i8) { |
6260 |
if (VT == MVT::v8i8) { |
| 6261 |
SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); |
6261 |
SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); |
| 6262 |
return; |
6262 |
return; |
| 6263 |
} else if (VT == MVT::v16i8) { |
6263 |
} else if (VT == MVT::v16i8) { |
| 6264 |
SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); |
6264 |
SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); |
| 6265 |
return; |
6265 |
return; |
| 6266 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6266 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6267 |
SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); |
6267 |
SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); |
| 6268 |
return; |
6268 |
return; |
| 6269 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { |
6269 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { |
| 6270 |
SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); |
6270 |
SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); |
| 6271 |
return; |
6271 |
return; |
| 6272 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6272 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6273 |
SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); |
6273 |
SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); |
| 6274 |
return; |
6274 |
return; |
| 6275 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6275 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6276 |
SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); |
6276 |
SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); |
| 6277 |
return; |
6277 |
return; |
| 6278 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6278 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6279 |
SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); |
6279 |
SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); |
| 6280 |
return; |
6280 |
return; |
| 6281 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6281 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6282 |
SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); |
6282 |
SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); |
| 6283 |
return; |
6283 |
return; |
| 6284 |
} |
6284 |
} |
| 6285 |
break; |
6285 |
break; |
| 6286 |
} |
6286 |
} |
| 6287 |
case AArch64ISD::ST1x4post: { |
6287 |
case AArch64ISD::ST1x4post: { |
| 6288 |
VT = Node->getOperand(1).getValueType(); |
6288 |
VT = Node->getOperand(1).getValueType(); |
| 6289 |
if (VT == MVT::v8i8) { |
6289 |
if (VT == MVT::v8i8) { |
| 6290 |
SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); |
6290 |
SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); |
| 6291 |
return; |
6291 |
return; |
| 6292 |
} else if (VT == MVT::v16i8) { |
6292 |
} else if (VT == MVT::v16i8) { |
| 6293 |
SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); |
6293 |
SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); |
| 6294 |
return; |
6294 |
return; |
| 6295 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
6295 |
} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { |
| 6296 |
SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); |
6296 |
SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); |
| 6297 |
return; |
6297 |
return; |
| 6298 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
6298 |
} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { |
| 6299 |
SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); |
6299 |
SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); |
| 6300 |
return; |
6300 |
return; |
| 6301 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
6301 |
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) { |
| 6302 |
SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); |
6302 |
SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); |
| 6303 |
return; |
6303 |
return; |
| 6304 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
6304 |
} else if (VT == MVT::v4i32 || VT == MVT::v4f32) { |
| 6305 |
SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); |
6305 |
SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); |
| 6306 |
return; |
6306 |
return; |
| 6307 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
6307 |
} else if (VT == MVT::v1i64 || VT == MVT::v1f64) { |
| 6308 |
SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); |
6308 |
SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); |
| 6309 |
return; |
6309 |
return; |
| 6310 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
6310 |
} else if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 6311 |
SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); |
6311 |
SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); |
| 6312 |
return; |
6312 |
return; |
| 6313 |
} |
6313 |
} |
| 6314 |
break; |
6314 |
break; |
| 6315 |
} |
6315 |
} |
| 6316 |
case AArch64ISD::ST2LANEpost: { |
6316 |
case AArch64ISD::ST2LANEpost: { |
| 6317 |
VT = Node->getOperand(1).getValueType(); |
6317 |
VT = Node->getOperand(1).getValueType(); |
| 6318 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
6318 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 6319 |
SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); |
6319 |
SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); |
| 6320 |
return; |
6320 |
return; |
| 6321 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
6321 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 6322 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
6322 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 6323 |
SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); |
6323 |
SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); |
| 6324 |
return; |
6324 |
return; |
| 6325 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
6325 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 6326 |
VT == MVT::v2f32) { |
6326 |
VT == MVT::v2f32) { |
| 6327 |
SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); |
6327 |
SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); |
| 6328 |
return; |
6328 |
return; |
| 6329 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
6329 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 6330 |
VT == MVT::v1f64) { |
6330 |
VT == MVT::v1f64) { |
| 6331 |
SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); |
6331 |
SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); |
| 6332 |
return; |
6332 |
return; |
| 6333 |
} |
6333 |
} |
| 6334 |
break; |
6334 |
break; |
| 6335 |
} |
6335 |
} |
| 6336 |
case AArch64ISD::ST3LANEpost: { |
6336 |
case AArch64ISD::ST3LANEpost: { |
| 6337 |
VT = Node->getOperand(1).getValueType(); |
6337 |
VT = Node->getOperand(1).getValueType(); |
| 6338 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
6338 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 6339 |
SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); |
6339 |
SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); |
| 6340 |
return; |
6340 |
return; |
| 6341 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
6341 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 6342 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
6342 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 6343 |
SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); |
6343 |
SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); |
| 6344 |
return; |
6344 |
return; |
| 6345 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
6345 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 6346 |
VT == MVT::v2f32) { |
6346 |
VT == MVT::v2f32) { |
| 6347 |
SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); |
6347 |
SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); |
| 6348 |
return; |
6348 |
return; |
| 6349 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
6349 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 6350 |
VT == MVT::v1f64) { |
6350 |
VT == MVT::v1f64) { |
| 6351 |
SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); |
6351 |
SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); |
| 6352 |
return; |
6352 |
return; |
| 6353 |
} |
6353 |
} |
| 6354 |
break; |
6354 |
break; |
| 6355 |
} |
6355 |
} |
| 6356 |
case AArch64ISD::ST4LANEpost: { |
6356 |
case AArch64ISD::ST4LANEpost: { |
| 6357 |
VT = Node->getOperand(1).getValueType(); |
6357 |
VT = Node->getOperand(1).getValueType(); |
| 6358 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
6358 |
if (VT == MVT::v16i8 || VT == MVT::v8i8) { |
| 6359 |
SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); |
6359 |
SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); |
| 6360 |
return; |
6360 |
return; |
| 6361 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
6361 |
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || |
| 6362 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
6362 |
VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { |
| 6363 |
SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); |
6363 |
SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); |
| 6364 |
return; |
6364 |
return; |
| 6365 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
6365 |
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || |
| 6366 |
VT == MVT::v2f32) { |
6366 |
VT == MVT::v2f32) { |
| 6367 |
SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); |
6367 |
SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); |
| 6368 |
return; |
6368 |
return; |
| 6369 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
6369 |
} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || |
| 6370 |
VT == MVT::v1f64) { |
6370 |
VT == MVT::v1f64) { |
| 6371 |
SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); |
6371 |
SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); |
| 6372 |
return; |
6372 |
return; |
| 6373 |
} |
6373 |
} |
| 6374 |
break; |
6374 |
break; |
| 6375 |
} |
6375 |
} |
| 6376 |
case AArch64ISD::SVE_LD2_MERGE_ZERO: { |
6376 |
case AArch64ISD::SVE_LD2_MERGE_ZERO: { |
| 6377 |
if (VT == MVT::nxv16i8) { |
6377 |
if (VT == MVT::nxv16i8) { |
| 6378 |
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); |
6378 |
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); |
| 6379 |
return; |
6379 |
return; |
| 6380 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
6380 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 6381 |
VT == MVT::nxv8bf16) { |
6381 |
VT == MVT::nxv8bf16) { |
| 6382 |
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); |
6382 |
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); |
| 6383 |
return; |
6383 |
return; |
| 6384 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
6384 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 6385 |
SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); |
6385 |
SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); |
| 6386 |
return; |
6386 |
return; |
| 6387 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
6387 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 6388 |
SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); |
6388 |
SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); |
| 6389 |
return; |
6389 |
return; |
| 6390 |
} |
6390 |
} |
| 6391 |
break; |
6391 |
break; |
| 6392 |
} |
6392 |
} |
| 6393 |
case AArch64ISD::SVE_LD3_MERGE_ZERO: { |
6393 |
case AArch64ISD::SVE_LD3_MERGE_ZERO: { |
| 6394 |
if (VT == MVT::nxv16i8) { |
6394 |
if (VT == MVT::nxv16i8) { |
| 6395 |
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); |
6395 |
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); |
| 6396 |
return; |
6396 |
return; |
| 6397 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
6397 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 6398 |
VT == MVT::nxv8bf16) { |
6398 |
VT == MVT::nxv8bf16) { |
| 6399 |
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); |
6399 |
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); |
| 6400 |
return; |
6400 |
return; |
| 6401 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
6401 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 6402 |
SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); |
6402 |
SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); |
| 6403 |
return; |
6403 |
return; |
| 6404 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
6404 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 6405 |
SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); |
6405 |
SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); |
| 6406 |
return; |
6406 |
return; |
| 6407 |
} |
6407 |
} |
| 6408 |
break; |
6408 |
break; |
| 6409 |
} |
6409 |
} |
| 6410 |
case AArch64ISD::SVE_LD4_MERGE_ZERO: { |
6410 |
case AArch64ISD::SVE_LD4_MERGE_ZERO: { |
| 6411 |
if (VT == MVT::nxv16i8) { |
6411 |
if (VT == MVT::nxv16i8) { |
| 6412 |
SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); |
6412 |
SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); |
| 6413 |
return; |
6413 |
return; |
| 6414 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
6414 |
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || |
| 6415 |
VT == MVT::nxv8bf16) { |
6415 |
VT == MVT::nxv8bf16) { |
| 6416 |
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); |
6416 |
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); |
| 6417 |
return; |
6417 |
return; |
| 6418 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
6418 |
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { |
| 6419 |
SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); |
6419 |
SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); |
| 6420 |
return; |
6420 |
return; |
| 6421 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
6421 |
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { |
| 6422 |
SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); |
6422 |
SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); |
| 6423 |
return; |
6423 |
return; |
| 6424 |
} |
6424 |
} |
| 6425 |
break; |
6425 |
break; |
| 6426 |
} |
6426 |
} |
| 6427 |
} |
6427 |
} |
| 6428 |
|
6428 |
|
| 6429 |
// Select the default instruction |
6429 |
// Select the default instruction |
| 6430 |
SelectCode(Node); |
6430 |
SelectCode(Node); |
| 6431 |
} |
6431 |
} |
| 6432 |
|
6432 |
|
| 6433 |
/// createAArch64ISelDag - This pass converts a legalized DAG into a |
6433 |
/// createAArch64ISelDag - This pass converts a legalized DAG into a |
| 6434 |
/// AArch64-specific DAG, ready for instruction scheduling. |
6434 |
/// AArch64-specific DAG, ready for instruction scheduling. |
| 6435 |
FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, |
6435 |
FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, |
| 6436 |
CodeGenOpt::Level OptLevel) { |
6436 |
CodeGenOpt::Level OptLevel) { |
| 6437 |
return new AArch64DAGToDAGISel(TM, OptLevel); |
6437 |
return new AArch64DAGToDAGISel(TM, OptLevel); |
| 6438 |
} |
6438 |
} |
| 6439 |
|
6439 |
|
| 6440 |
/// When \p PredVT is a scalable vector predicate in the form |
6440 |
/// When \p PredVT is a scalable vector predicate in the form |
| 6441 |
/// MVT::nxxi1, it builds the correspondent scalable vector of |
6441 |
/// MVT::nxxi1, it builds the correspondent scalable vector of |
| 6442 |
/// integers MVT::nxxi s.t. M x bits = 128. When targeting |
6442 |
/// integers MVT::nxxi s.t. M x bits = 128. When targeting |
| 6443 |
/// structured vectors (NumVec >1), the output data type is |
6443 |
/// structured vectors (NumVec >1), the output data type is |
| 6444 |
/// MVT::nxxi s.t. M x bits = 128. If the input |
6444 |
/// MVT::nxxi s.t. M x bits = 128. If the input |
| 6445 |
/// PredVT is not in the form MVT::nxxi1, it returns an invalid |
6445 |
/// PredVT is not in the form MVT::nxxi1, it returns an invalid |
| 6446 |
/// EVT. |
6446 |
/// EVT. |
| 6447 |
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, |
6447 |
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, |
| 6448 |
unsigned NumVec) { |
6448 |
unsigned NumVec) { |
| 6449 |
assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); |
6449 |
assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); |
| 6450 |
if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) |
6450 |
if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) |
| 6451 |
return EVT(); |
6451 |
return EVT(); |
| 6452 |
|
6452 |
|
| 6453 |
if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && |
6453 |
if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && |
| 6454 |
PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) |
6454 |
PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) |
| 6455 |
return EVT(); |
6455 |
return EVT(); |
| 6456 |
|
6456 |
|
| 6457 |
ElementCount EC = PredVT.getVectorElementCount(); |
6457 |
ElementCount EC = PredVT.getVectorElementCount(); |
| 6458 |
EVT ScalarVT = |
6458 |
EVT ScalarVT = |
| 6459 |
EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); |
6459 |
EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); |
| 6460 |
EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); |
6460 |
EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); |
| 6461 |
|
6461 |
|
| 6462 |
return MemVT; |
6462 |
return MemVT; |
| 6463 |
} |
6463 |
} |
| 6464 |
|
6464 |
|
| 6465 |
/// Return the EVT of the data associated to a memory operation in \p |
6465 |
/// Return the EVT of the data associated to a memory operation in \p |
| 6466 |
/// Root. If such EVT cannot be retrived, it returns an invalid EVT. |
6466 |
/// Root. If such EVT cannot be retrived, it returns an invalid EVT. |
| 6467 |
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { |
6467 |
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { |
| 6468 |
if (isa(Root)) |
6468 |
if (isa(Root)) |
| 6469 |
return cast(Root)->getMemoryVT(); |
6469 |
return cast(Root)->getMemoryVT(); |
| 6470 |
|
6470 |
|
| 6471 |
if (isa(Root)) |
6471 |
if (isa(Root)) |
| 6472 |
return cast(Root)->getMemoryVT(); |
6472 |
return cast(Root)->getMemoryVT(); |
| 6473 |
|
6473 |
|
| 6474 |
const unsigned Opcode = Root->getOpcode(); |
6474 |
const unsigned Opcode = Root->getOpcode(); |
| 6475 |
// For custom ISD nodes, we have to look at them individually to extract the |
6475 |
// For custom ISD nodes, we have to look at them individually to extract the |
| 6476 |
// type of the data moved to/from memory. |
6476 |
// type of the data moved to/from memory. |
| 6477 |
switch (Opcode) { |
6477 |
switch (Opcode) { |
| 6478 |
case AArch64ISD::LD1_MERGE_ZERO: |
6478 |
case AArch64ISD::LD1_MERGE_ZERO: |
| 6479 |
case AArch64ISD::LD1S_MERGE_ZERO: |
6479 |
case AArch64ISD::LD1S_MERGE_ZERO: |
| 6480 |
case AArch64ISD::LDNF1_MERGE_ZERO: |
6480 |
case AArch64ISD::LDNF1_MERGE_ZERO: |
| 6481 |
case AArch64ISD::LDNF1S_MERGE_ZERO: |
6481 |
case AArch64ISD::LDNF1S_MERGE_ZERO: |
| 6482 |
return cast(Root->getOperand(3))->getVT(); |
6482 |
return cast(Root->getOperand(3))->getVT(); |
| 6483 |
case AArch64ISD::ST1_PRED: |
6483 |
case AArch64ISD::ST1_PRED: |
| 6484 |
return cast(Root->getOperand(4))->getVT(); |
6484 |
return cast(Root->getOperand(4))->getVT(); |
| 6485 |
case AArch64ISD::SVE_LD2_MERGE_ZERO: |
6485 |
case AArch64ISD::SVE_LD2_MERGE_ZERO: |
| 6486 |
return getPackedVectorTypeFromPredicateType( |
6486 |
return getPackedVectorTypeFromPredicateType( |
| 6487 |
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); |
6487 |
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); |
| 6488 |
case AArch64ISD::SVE_LD3_MERGE_ZERO: |
6488 |
case AArch64ISD::SVE_LD3_MERGE_ZERO: |
| 6489 |
return getPackedVectorTypeFromPredicateType( |
6489 |
return getPackedVectorTypeFromPredicateType( |
| 6490 |
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); |
6490 |
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); |
| 6491 |
case AArch64ISD::SVE_LD4_MERGE_ZERO: |
6491 |
case AArch64ISD::SVE_LD4_MERGE_ZERO: |
| 6492 |
return getPackedVectorTypeFromPredicateType( |
6492 |
return getPackedVectorTypeFromPredicateType( |
| 6493 |
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); |
6493 |
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); |
| 6494 |
default: |
6494 |
default: |
| 6495 |
break; |
6495 |
break; |
| 6496 |
} |
6496 |
} |
| 6497 |
|
6497 |
|
| 6498 |
if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) |
6498 |
if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) |
| 6499 |
return EVT(); |
6499 |
return EVT(); |
| 6500 |
|
6500 |
|
| 6501 |
switch (cast(Root->getOperand(1))->getZExtValue()) { |
6501 |
switch (cast(Root->getOperand(1))->getZExtValue()) { |
| 6502 |
default: |
6502 |
default: |
| 6503 |
return EVT(); |
6503 |
return EVT(); |
| 6504 |
case Intrinsic::aarch64_sme_ldr: |
6504 |
case Intrinsic::aarch64_sme_ldr: |
| 6505 |
case Intrinsic::aarch64_sme_str: |
6505 |
case Intrinsic::aarch64_sme_str: |
| 6506 |
return MVT::nxv16i8; |
6506 |
return MVT::nxv16i8; |
| 6507 |
case Intrinsic::aarch64_sve_prf: |
6507 |
case Intrinsic::aarch64_sve_prf: |
| 6508 |
// We are using an SVE prefetch intrinsic. Type must be inferred from the |
6508 |
// We are using an SVE prefetch intrinsic. Type must be inferred from the |
| 6509 |
// width of the predicate. |
6509 |
// width of the predicate. |
| 6510 |
return getPackedVectorTypeFromPredicateType( |
6510 |
return getPackedVectorTypeFromPredicateType( |
| 6511 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); |
6511 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); |
| 6512 |
case Intrinsic::aarch64_sve_ld2_sret: |
6512 |
case Intrinsic::aarch64_sve_ld2_sret: |
| 6513 |
return getPackedVectorTypeFromPredicateType( |
6513 |
return getPackedVectorTypeFromPredicateType( |
| 6514 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); |
6514 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2); |
| 6515 |
case Intrinsic::aarch64_sve_ld3_sret: |
6515 |
case Intrinsic::aarch64_sve_ld3_sret: |
| 6516 |
return getPackedVectorTypeFromPredicateType( |
6516 |
return getPackedVectorTypeFromPredicateType( |
| 6517 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); |
6517 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3); |
| 6518 |
case Intrinsic::aarch64_sve_ld4_sret: |
6518 |
case Intrinsic::aarch64_sve_ld4_sret: |
| 6519 |
return getPackedVectorTypeFromPredicateType( |
6519 |
return getPackedVectorTypeFromPredicateType( |
| 6520 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); |
6520 |
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4); |
| 6521 |
} |
6521 |
} |
| 6522 |
} |
6522 |
} |
| 6523 |
|
6523 |
|
| 6524 |
/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: |
6524 |
/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: |
| 6525 |
/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max |
6525 |
/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max |
| 6526 |
/// where Root is the memory access using N for its address. |
6526 |
/// where Root is the memory access using N for its address. |
| 6527 |
template |
6527 |
template |
| 6528 |
bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, |
6528 |
bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, |
| 6529 |
SDValue &Base, |
6529 |
SDValue &Base, |
| 6530 |
SDValue &OffImm) { |
6530 |
SDValue &OffImm) { |
| 6531 |
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); |
6531 |
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); |
| 6532 |
const DataLayout &DL = CurDAG->getDataLayout(); |
6532 |
const DataLayout &DL = CurDAG->getDataLayout(); |
| 6533 |
const MachineFrameInfo &MFI = MF->getFrameInfo(); |
6533 |
const MachineFrameInfo &MFI = MF->getFrameInfo(); |
| 6534 |
|
6534 |
|
| 6535 |
if (N.getOpcode() == ISD::FrameIndex) { |
6535 |
if (N.getOpcode() == ISD::FrameIndex) { |
| 6536 |
int FI = cast(N)->getIndex(); |
6536 |
int FI = cast(N)->getIndex(); |
| 6537 |
// We can only encode VL scaled offsets, so only fold in frame indexes |
6537 |
// We can only encode VL scaled offsets, so only fold in frame indexes |
| 6538 |
// referencing SVE objects. |
6538 |
// referencing SVE objects. |
| 6539 |
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { |
6539 |
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { |
| 6540 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
6540 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 6541 |
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); |
6541 |
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); |
| 6542 |
return true; |
6542 |
return true; |
| 6543 |
} |
6543 |
} |
| 6544 |
|
6544 |
|
| 6545 |
return false; |
6545 |
return false; |
| 6546 |
} |
6546 |
} |
| 6547 |
|
6547 |
|
| 6548 |
if (MemVT == EVT()) |
6548 |
if (MemVT == EVT()) |
| 6549 |
return false; |
6549 |
return false; |
| 6550 |
|
6550 |
|
| 6551 |
if (N.getOpcode() != ISD::ADD) |
6551 |
if (N.getOpcode() != ISD::ADD) |
| 6552 |
return false; |
6552 |
return false; |
| 6553 |
|
6553 |
|
| 6554 |
SDValue VScale = N.getOperand(1); |
6554 |
SDValue VScale = N.getOperand(1); |
| 6555 |
if (VScale.getOpcode() != ISD::VSCALE) |
6555 |
if (VScale.getOpcode() != ISD::VSCALE) |
| 6556 |
return false; |
6556 |
return false; |
| 6557 |
|
6557 |
|
| 6558 |
TypeSize TS = MemVT.getSizeInBits(); |
6558 |
TypeSize TS = MemVT.getSizeInBits(); |
| 6559 |
int64_t MemWidthBytes = static_cast(TS.getKnownMinValue()) / 8; |
6559 |
int64_t MemWidthBytes = static_cast(TS.getKnownMinValue()) / 8; |
| 6560 |
int64_t MulImm = cast(VScale.getOperand(0))->getSExtValue(); |
6560 |
int64_t MulImm = cast(VScale.getOperand(0))->getSExtValue(); |
| 6561 |
|
6561 |
|
| 6562 |
if ((MulImm % MemWidthBytes) != 0) |
6562 |
if ((MulImm % MemWidthBytes) != 0) |
| 6563 |
return false; |
6563 |
return false; |
| 6564 |
|
6564 |
|
| 6565 |
int64_t Offset = MulImm / MemWidthBytes; |
6565 |
int64_t Offset = MulImm / MemWidthBytes; |
| 6566 |
if (Offset < Min || Offset > Max) |
6566 |
if (Offset < Min || Offset > Max) |
| 6567 |
return false; |
6567 |
return false; |
| 6568 |
|
6568 |
|
| 6569 |
Base = N.getOperand(0); |
6569 |
Base = N.getOperand(0); |
| 6570 |
if (Base.getOpcode() == ISD::FrameIndex) { |
6570 |
if (Base.getOpcode() == ISD::FrameIndex) { |
| 6571 |
int FI = cast(Base)->getIndex(); |
6571 |
int FI = cast(Base)->getIndex(); |
| 6572 |
// We can only encode VL scaled offsets, so only fold in frame indexes |
6572 |
// We can only encode VL scaled offsets, so only fold in frame indexes |
| 6573 |
// referencing SVE objects. |
6573 |
// referencing SVE objects. |
| 6574 |
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) |
6574 |
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) |
| 6575 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
6575 |
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); |
| 6576 |
} |
6576 |
} |
| 6577 |
|
6577 |
|
| 6578 |
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); |
6578 |
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); |
| 6579 |
return true; |
6579 |
return true; |
| 6580 |
} |
6580 |
} |
| 6581 |
|
6581 |
|
| 6582 |
/// Select register plus register addressing mode for SVE, with scaled |
6582 |
/// Select register plus register addressing mode for SVE, with scaled |
| 6583 |
/// offset. |
6583 |
/// offset. |
| 6584 |
bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, |
6584 |
bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, |
| 6585 |
SDValue &Base, |
6585 |
SDValue &Base, |
| 6586 |
SDValue &Offset) { |
6586 |
SDValue &Offset) { |
| 6587 |
if (N.getOpcode() != ISD::ADD) |
6587 |
if (N.getOpcode() != ISD::ADD) |
| 6588 |
return false; |
6588 |
return false; |
| 6589 |
|
6589 |
|
| 6590 |
// Process an ADD node. |
6590 |
// Process an ADD node. |
| 6591 |
const SDValue LHS = N.getOperand(0); |
6591 |
const SDValue LHS = N.getOperand(0); |
| 6592 |
const SDValue RHS = N.getOperand(1); |
6592 |
const SDValue RHS = N.getOperand(1); |
| 6593 |
|
6593 |
|
| 6594 |
// 8 bit data does not come with the SHL node, so it is treated |
6594 |
// 8 bit data does not come with the SHL node, so it is treated |
| 6595 |
// separately. |
6595 |
// separately. |
| 6596 |
if (Scale == 0) { |
6596 |
if (Scale == 0) { |
| 6597 |
Base = LHS; |
6597 |
Base = LHS; |
| 6598 |
Offset = RHS; |
6598 |
Offset = RHS; |
| 6599 |
return true; |
6599 |
return true; |
| 6600 |
} |
6600 |
} |
| 6601 |
|
6601 |
|
| 6602 |
if (auto C = dyn_cast(RHS)) { |
6602 |
if (auto C = dyn_cast(RHS)) { |
| 6603 |
int64_t ImmOff = C->getSExtValue(); |
6603 |
int64_t ImmOff = C->getSExtValue(); |
| 6604 |
unsigned Size = 1 << Scale; |
6604 |
unsigned Size = 1 << Scale; |
| 6605 |
|
6605 |
|
| 6606 |
// To use the reg+reg addressing mode, the immediate must be a multiple of |
6606 |
// To use the reg+reg addressing mode, the immediate must be a multiple of |
| 6607 |
// the vector element's byte size. |
6607 |
// the vector element's byte size. |
| 6608 |
if (ImmOff % Size) |
6608 |
if (ImmOff % Size) |
| 6609 |
return false; |
6609 |
return false; |
| 6610 |
|
6610 |
|
| 6611 |
SDLoc DL(N); |
6611 |
SDLoc DL(N); |
| 6612 |
Base = LHS; |
6612 |
Base = LHS; |
| 6613 |
Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); |
6613 |
Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); |
| 6614 |
SDValue Ops[] = {Offset}; |
6614 |
SDValue Ops[] = {Offset}; |
| 6615 |
SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); |
6615 |
SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); |
| 6616 |
Offset = SDValue(MI, 0); |
6616 |
Offset = SDValue(MI, 0); |
| 6617 |
return true; |
6617 |
return true; |
| 6618 |
} |
6618 |
} |
| 6619 |
|
6619 |
|
| 6620 |
// Check if the RHS is a shift node with a constant. |
6620 |
// Check if the RHS is a shift node with a constant. |
| 6621 |
if (RHS.getOpcode() != ISD::SHL) |
6621 |
if (RHS.getOpcode() != ISD::SHL) |
| 6622 |
return false; |
6622 |
return false; |
| 6623 |
|
6623 |
|
| 6624 |
const SDValue ShiftRHS = RHS.getOperand(1); |
6624 |
const SDValue ShiftRHS = RHS.getOperand(1); |
| 6625 |
if (auto *C = dyn_cast(ShiftRHS)) |
6625 |
if (auto *C = dyn_cast(ShiftRHS)) |
| 6626 |
if (C->getZExtValue() == Scale) { |
6626 |
if (C->getZExtValue() == Scale) { |
| 6627 |
Base = LHS; |
6627 |
Base = LHS; |
| 6628 |
Offset = RHS.getOperand(0); |
6628 |
Offset = RHS.getOperand(0); |
| 6629 |
return true; |
6629 |
return true; |
| 6630 |
} |
6630 |
} |
| 6631 |
|
6631 |
|
| 6632 |
return false; |
6632 |
return false; |
| 6633 |
} |
6633 |
} |
| 6634 |
|
6634 |
|
| 6635 |
bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { |
6635 |
bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { |
| 6636 |
const AArch64TargetLowering *TLI = |
6636 |
const AArch64TargetLowering *TLI = |
| 6637 |
static_cast(getTargetLowering()); |
6637 |
static_cast(getTargetLowering()); |
| 6638 |
|
6638 |
|
| 6639 |
return TLI->isAllActivePredicate(*CurDAG, N); |
6639 |
return TLI->isAllActivePredicate(*CurDAG, N); |
| 6640 |
} |
6640 |
} |
| 6641 |
|
6641 |
|
| 6642 |
bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) { |
6642 |
bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) { |
| 6643 |
EVT VT = N.getValueType(); |
6643 |
EVT VT = N.getValueType(); |
| 6644 |
return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1; |
6644 |
return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1; |
| 6645 |
} |
6645 |
} |
| 6646 |
|
6646 |
|
| 6647 |
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, |
6647 |
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, |
| 6648 |
SDValue &Base, SDValue &Offset, |
6648 |
SDValue &Base, SDValue &Offset, |
| 6649 |
unsigned Scale) { |
6649 |
unsigned Scale) { |
| 6650 |
// Try to untangle an ADD node into a 'reg + offset' |
6650 |
// Try to untangle an ADD node into a 'reg + offset' |
| 6651 |
if (N.getOpcode() == ISD::ADD) |
6651 |
if (N.getOpcode() == ISD::ADD) |
| 6652 |
if (auto C = dyn_cast(N.getOperand(1))) { |
6652 |
if (auto C = dyn_cast(N.getOperand(1))) { |
| 6653 |
int64_t ImmOff = C->getSExtValue(); |
6653 |
int64_t ImmOff = C->getSExtValue(); |
| 6654 |
if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) { |
6654 |
if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) { |
| 6655 |
Base = N.getOperand(0); |
6655 |
Base = N.getOperand(0); |
| 6656 |
Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); |
6656 |
Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); |
| 6657 |
return true; |
6657 |
return true; |
| 6658 |
} |
6658 |
} |
| 6659 |
} |
6659 |
} |
| 6660 |
|
6660 |
|
| 6661 |
// By default, just match reg + 0. |
6661 |
// By default, just match reg + 0. |
| 6662 |
Base = N; |
6662 |
Base = N; |
| 6663 |
Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); |
6663 |
Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); |
| 6664 |
return true; |
6664 |
return true; |
| 6665 |
} |
6665 |
} |
| 6666 |
|
6666 |
|